net/e1000: convert to new Rx offloads API
[dpdk.git] / drivers / net / e1000 / igb_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4
5 #include <sys/queue.h>
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <inttypes.h>
14
15 #include <rte_interrupts.h>
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_log.h>
19 #include <rte_debug.h>
20 #include <rte_pci.h>
21 #include <rte_memory.h>
22 #include <rte_memcpy.h>
23 #include <rte_memzone.h>
24 #include <rte_launch.h>
25 #include <rte_eal.h>
26 #include <rte_per_lcore.h>
27 #include <rte_lcore.h>
28 #include <rte_atomic.h>
29 #include <rte_branch_prediction.h>
30 #include <rte_mempool.h>
31 #include <rte_malloc.h>
32 #include <rte_mbuf.h>
33 #include <rte_ether.h>
34 #include <rte_ethdev_driver.h>
35 #include <rte_prefetch.h>
36 #include <rte_udp.h>
37 #include <rte_tcp.h>
38 #include <rte_sctp.h>
39 #include <rte_net.h>
40 #include <rte_string_fns.h>
41
42 #include "e1000_logs.h"
43 #include "base/e1000_api.h"
44 #include "e1000_ethdev.h"
45
46 #ifdef RTE_LIBRTE_IEEE1588
47 #define IGB_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
48 #else
49 #define IGB_TX_IEEE1588_TMST 0
50 #endif
51 /* Bit Mask to indicate what bits required for building TX context */
52 #define IGB_TX_OFFLOAD_MASK (                    \
53                 PKT_TX_VLAN_PKT |                \
54                 PKT_TX_IP_CKSUM |                \
55                 PKT_TX_L4_MASK |                 \
56                 PKT_TX_TCP_SEG |                 \
57                 IGB_TX_IEEE1588_TMST)
58
59 #define IGB_TX_OFFLOAD_NOTSUP_MASK \
60                 (PKT_TX_OFFLOAD_MASK ^ IGB_TX_OFFLOAD_MASK)
61
62 /**
63  * Structure associated with each descriptor of the RX ring of a RX queue.
64  */
65 struct igb_rx_entry {
66         struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
67 };
68
69 /**
70  * Structure associated with each descriptor of the TX ring of a TX queue.
71  */
72 struct igb_tx_entry {
73         struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
74         uint16_t next_id; /**< Index of next descriptor in ring. */
75         uint16_t last_id; /**< Index of last scattered descriptor. */
76 };
77
78 /**
79  * rx queue flags
80  */
81 enum igb_rxq_flags {
82         IGB_RXQ_FLAG_LB_BSWAP_VLAN = 0x01,
83 };
84
85 /**
86  * Structure associated with each RX queue.
87  */
88 struct igb_rx_queue {
89         struct rte_mempool  *mb_pool;   /**< mbuf pool to populate RX ring. */
90         volatile union e1000_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
91         uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
92         volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
93         volatile uint32_t   *rdh_reg_addr; /**< RDH register address. */
94         struct igb_rx_entry *sw_ring;   /**< address of RX software ring. */
95         struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
96         struct rte_mbuf *pkt_last_seg;  /**< Last segment of current packet. */
97         uint16_t            nb_rx_desc; /**< number of RX descriptors. */
98         uint16_t            rx_tail;    /**< current value of RDT register. */
99         uint16_t            nb_rx_hold; /**< number of held free RX desc. */
100         uint16_t            rx_free_thresh; /**< max free RX desc to hold. */
101         uint16_t            queue_id;   /**< RX queue index. */
102         uint16_t            reg_idx;    /**< RX queue register index. */
103         uint16_t            port_id;    /**< Device port identifier. */
104         uint8_t             pthresh;    /**< Prefetch threshold register. */
105         uint8_t             hthresh;    /**< Host threshold register. */
106         uint8_t             wthresh;    /**< Write-back threshold register. */
107         uint8_t             crc_len;    /**< 0 if CRC stripped, 4 otherwise. */
108         uint8_t             drop_en;  /**< If not 0, set SRRCTL.Drop_En. */
109         uint32_t            flags;      /**< RX flags. */
110         uint64_t            offloads;   /**< offloads of DEV_RX_OFFLOAD_* */
111 };
112
113 /**
114  * Hardware context number
115  */
116 enum igb_advctx_num {
117         IGB_CTX_0    = 0, /**< CTX0    */
118         IGB_CTX_1    = 1, /**< CTX1    */
119         IGB_CTX_NUM  = 2, /**< CTX_NUM */
120 };
121
122 /** Offload features */
123 union igb_tx_offload {
124         uint64_t data;
125         struct {
126                 uint64_t l3_len:9; /**< L3 (IP) Header Length. */
127                 uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
128                 uint64_t vlan_tci:16;  /**< VLAN Tag Control Identifier(CPU order). */
129                 uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
130                 uint64_t tso_segsz:16; /**< TCP TSO segment size. */
131
132                 /* uint64_t unused:8; */
133         };
134 };
135
136 /*
137  * Compare mask for igb_tx_offload.data,
138  * should be in sync with igb_tx_offload layout.
139  * */
140 #define TX_MACIP_LEN_CMP_MASK   0x000000000000FFFFULL /**< L2L3 header mask. */
141 #define TX_VLAN_CMP_MASK                0x00000000FFFF0000ULL /**< Vlan mask. */
142 #define TX_TCP_LEN_CMP_MASK             0x000000FF00000000ULL /**< TCP header mask. */
143 #define TX_TSO_MSS_CMP_MASK             0x00FFFF0000000000ULL /**< TSO segsz mask. */
144 /** Mac + IP + TCP + Mss mask. */
145 #define TX_TSO_CMP_MASK \
146         (TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK)
147
148 /**
149  * Strucutre to check if new context need be built
150  */
151 struct igb_advctx_info {
152         uint64_t flags;           /**< ol_flags related to context build. */
153         /** tx offload: vlan, tso, l2-l3-l4 lengths. */
154         union igb_tx_offload tx_offload;
155         /** compare mask for tx offload. */
156         union igb_tx_offload tx_offload_mask;
157 };
158
159 /**
160  * Structure associated with each TX queue.
161  */
162 struct igb_tx_queue {
163         volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
164         uint64_t               tx_ring_phys_addr; /**< TX ring DMA address. */
165         struct igb_tx_entry    *sw_ring; /**< virtual address of SW ring. */
166         volatile uint32_t      *tdt_reg_addr; /**< Address of TDT register. */
167         uint32_t               txd_type;      /**< Device-specific TXD type */
168         uint16_t               nb_tx_desc;    /**< number of TX descriptors. */
169         uint16_t               tx_tail; /**< Current value of TDT register. */
170         uint16_t               tx_head;
171         /**< Index of first used TX descriptor. */
172         uint16_t               queue_id; /**< TX queue index. */
173         uint16_t               reg_idx;  /**< TX queue register index. */
174         uint16_t               port_id;  /**< Device port identifier. */
175         uint8_t                pthresh;  /**< Prefetch threshold register. */
176         uint8_t                hthresh;  /**< Host threshold register. */
177         uint8_t                wthresh;  /**< Write-back threshold register. */
178         uint32_t               ctx_curr;
179         /**< Current used hardware descriptor. */
180         uint32_t               ctx_start;
181         /**< Start context position for transmit queue. */
182         struct igb_advctx_info ctx_cache[IGB_CTX_NUM];
183         /**< Hardware context history.*/
184 };
185
186 #if 1
187 #define RTE_PMD_USE_PREFETCH
188 #endif
189
190 #ifdef RTE_PMD_USE_PREFETCH
191 #define rte_igb_prefetch(p)     rte_prefetch0(p)
192 #else
193 #define rte_igb_prefetch(p)     do {} while(0)
194 #endif
195
196 #ifdef RTE_PMD_PACKET_PREFETCH
197 #define rte_packet_prefetch(p) rte_prefetch1(p)
198 #else
199 #define rte_packet_prefetch(p)  do {} while(0)
200 #endif
201
202 /*
203  * Macro for VMDq feature for 1 GbE NIC.
204  */
205 #define E1000_VMOLR_SIZE                        (8)
206 #define IGB_TSO_MAX_HDRLEN                      (512)
207 #define IGB_TSO_MAX_MSS                         (9216)
208
209 /*********************************************************************
210  *
211  *  TX function
212  *
213  **********************************************************************/
214
215 /*
216  *There're some limitations in hardware for TCP segmentation offload. We
217  *should check whether the parameters are valid.
218  */
219 static inline uint64_t
220 check_tso_para(uint64_t ol_req, union igb_tx_offload ol_para)
221 {
222         if (!(ol_req & PKT_TX_TCP_SEG))
223                 return ol_req;
224         if ((ol_para.tso_segsz > IGB_TSO_MAX_MSS) || (ol_para.l2_len +
225                         ol_para.l3_len + ol_para.l4_len > IGB_TSO_MAX_HDRLEN)) {
226                 ol_req &= ~PKT_TX_TCP_SEG;
227                 ol_req |= PKT_TX_TCP_CKSUM;
228         }
229         return ol_req;
230 }
231
232 /*
233  * Advanced context descriptor are almost same between igb/ixgbe
234  * This is a separate function, looking for optimization opportunity here
235  * Rework required to go with the pre-defined values.
236  */
237
238 static inline void
239 igbe_set_xmit_ctx(struct igb_tx_queue* txq,
240                 volatile struct e1000_adv_tx_context_desc *ctx_txd,
241                 uint64_t ol_flags, union igb_tx_offload tx_offload)
242 {
243         uint32_t type_tucmd_mlhl;
244         uint32_t mss_l4len_idx;
245         uint32_t ctx_idx, ctx_curr;
246         uint32_t vlan_macip_lens;
247         union igb_tx_offload tx_offload_mask;
248
249         ctx_curr = txq->ctx_curr;
250         ctx_idx = ctx_curr + txq->ctx_start;
251
252         tx_offload_mask.data = 0;
253         type_tucmd_mlhl = 0;
254
255         /* Specify which HW CTX to upload. */
256         mss_l4len_idx = (ctx_idx << E1000_ADVTXD_IDX_SHIFT);
257
258         if (ol_flags & PKT_TX_VLAN_PKT)
259                 tx_offload_mask.data |= TX_VLAN_CMP_MASK;
260
261         /* check if TCP segmentation required for this packet */
262         if (ol_flags & PKT_TX_TCP_SEG) {
263                 /* implies IP cksum in IPv4 */
264                 if (ol_flags & PKT_TX_IP_CKSUM)
265                         type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4 |
266                                 E1000_ADVTXD_TUCMD_L4T_TCP |
267                                 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
268                 else
269                         type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV6 |
270                                 E1000_ADVTXD_TUCMD_L4T_TCP |
271                                 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
272
273                 tx_offload_mask.data |= TX_TSO_CMP_MASK;
274                 mss_l4len_idx |= tx_offload.tso_segsz << E1000_ADVTXD_MSS_SHIFT;
275                 mss_l4len_idx |= tx_offload.l4_len << E1000_ADVTXD_L4LEN_SHIFT;
276         } else { /* no TSO, check if hardware checksum is needed */
277                 if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK))
278                         tx_offload_mask.data |= TX_MACIP_LEN_CMP_MASK;
279
280                 if (ol_flags & PKT_TX_IP_CKSUM)
281                         type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4;
282
283                 switch (ol_flags & PKT_TX_L4_MASK) {
284                 case PKT_TX_UDP_CKSUM:
285                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP |
286                                 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
287                         mss_l4len_idx |= sizeof(struct udp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
288                         break;
289                 case PKT_TX_TCP_CKSUM:
290                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP |
291                                 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
292                         mss_l4len_idx |= sizeof(struct tcp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
293                         break;
294                 case PKT_TX_SCTP_CKSUM:
295                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP |
296                                 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
297                         mss_l4len_idx |= sizeof(struct sctp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
298                         break;
299                 default:
300                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_RSV |
301                                 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
302                         break;
303                 }
304         }
305
306         txq->ctx_cache[ctx_curr].flags = ol_flags;
307         txq->ctx_cache[ctx_curr].tx_offload.data =
308                 tx_offload_mask.data & tx_offload.data;
309         txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask;
310
311         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
312         vlan_macip_lens = (uint32_t)tx_offload.data;
313         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
314         ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
315         ctx_txd->seqnum_seed = 0;
316 }
317
318 /*
319  * Check which hardware context can be used. Use the existing match
320  * or create a new context descriptor.
321  */
322 static inline uint32_t
323 what_advctx_update(struct igb_tx_queue *txq, uint64_t flags,
324                 union igb_tx_offload tx_offload)
325 {
326         /* If match with the current context */
327         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
328                 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
329                 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
330                         return txq->ctx_curr;
331         }
332
333         /* If match with the second context */
334         txq->ctx_curr ^= 1;
335         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
336                 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
337                 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
338                         return txq->ctx_curr;
339         }
340
341         /* Mismatch, use the previous context */
342         return IGB_CTX_NUM;
343 }
344
345 static inline uint32_t
346 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
347 {
348         static const uint32_t l4_olinfo[2] = {0, E1000_ADVTXD_POPTS_TXSM};
349         static const uint32_t l3_olinfo[2] = {0, E1000_ADVTXD_POPTS_IXSM};
350         uint32_t tmp;
351
352         tmp  = l4_olinfo[(ol_flags & PKT_TX_L4_MASK)  != PKT_TX_L4_NO_CKSUM];
353         tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
354         tmp |= l4_olinfo[(ol_flags & PKT_TX_TCP_SEG) != 0];
355         return tmp;
356 }
357
358 static inline uint32_t
359 tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags)
360 {
361         uint32_t cmdtype;
362         static uint32_t vlan_cmd[2] = {0, E1000_ADVTXD_DCMD_VLE};
363         static uint32_t tso_cmd[2] = {0, E1000_ADVTXD_DCMD_TSE};
364         cmdtype = vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
365         cmdtype |= tso_cmd[(ol_flags & PKT_TX_TCP_SEG) != 0];
366         return cmdtype;
367 }
368
369 uint16_t
370 eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
371                uint16_t nb_pkts)
372 {
373         struct igb_tx_queue *txq;
374         struct igb_tx_entry *sw_ring;
375         struct igb_tx_entry *txe, *txn;
376         volatile union e1000_adv_tx_desc *txr;
377         volatile union e1000_adv_tx_desc *txd;
378         struct rte_mbuf     *tx_pkt;
379         struct rte_mbuf     *m_seg;
380         uint64_t buf_dma_addr;
381         uint32_t olinfo_status;
382         uint32_t cmd_type_len;
383         uint32_t pkt_len;
384         uint16_t slen;
385         uint64_t ol_flags;
386         uint16_t tx_end;
387         uint16_t tx_id;
388         uint16_t tx_last;
389         uint16_t nb_tx;
390         uint64_t tx_ol_req;
391         uint32_t new_ctx = 0;
392         uint32_t ctx = 0;
393         union igb_tx_offload tx_offload = {0};
394
395         txq = tx_queue;
396         sw_ring = txq->sw_ring;
397         txr     = txq->tx_ring;
398         tx_id   = txq->tx_tail;
399         txe = &sw_ring[tx_id];
400
401         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
402                 tx_pkt = *tx_pkts++;
403                 pkt_len = tx_pkt->pkt_len;
404
405                 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
406
407                 /*
408                  * The number of descriptors that must be allocated for a
409                  * packet is the number of segments of that packet, plus 1
410                  * Context Descriptor for the VLAN Tag Identifier, if any.
411                  * Determine the last TX descriptor to allocate in the TX ring
412                  * for the packet, starting from the current position (tx_id)
413                  * in the ring.
414                  */
415                 tx_last = (uint16_t) (tx_id + tx_pkt->nb_segs - 1);
416
417                 ol_flags = tx_pkt->ol_flags;
418                 tx_ol_req = ol_flags & IGB_TX_OFFLOAD_MASK;
419
420                 /* If a Context Descriptor need be built . */
421                 if (tx_ol_req) {
422                         tx_offload.l2_len = tx_pkt->l2_len;
423                         tx_offload.l3_len = tx_pkt->l3_len;
424                         tx_offload.l4_len = tx_pkt->l4_len;
425                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
426                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
427                         tx_ol_req = check_tso_para(tx_ol_req, tx_offload);
428
429                         ctx = what_advctx_update(txq, tx_ol_req, tx_offload);
430                         /* Only allocate context descriptor if required*/
431                         new_ctx = (ctx == IGB_CTX_NUM);
432                         ctx = txq->ctx_curr + txq->ctx_start;
433                         tx_last = (uint16_t) (tx_last + new_ctx);
434                 }
435                 if (tx_last >= txq->nb_tx_desc)
436                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
437
438                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
439                            " tx_first=%u tx_last=%u",
440                            (unsigned) txq->port_id,
441                            (unsigned) txq->queue_id,
442                            (unsigned) pkt_len,
443                            (unsigned) tx_id,
444                            (unsigned) tx_last);
445
446                 /*
447                  * Check if there are enough free descriptors in the TX ring
448                  * to transmit the next packet.
449                  * This operation is based on the two following rules:
450                  *
451                  *   1- Only check that the last needed TX descriptor can be
452                  *      allocated (by construction, if that descriptor is free,
453                  *      all intermediate ones are also free).
454                  *
455                  *      For this purpose, the index of the last TX descriptor
456                  *      used for a packet (the "last descriptor" of a packet)
457                  *      is recorded in the TX entries (the last one included)
458                  *      that are associated with all TX descriptors allocated
459                  *      for that packet.
460                  *
461                  *   2- Avoid to allocate the last free TX descriptor of the
462                  *      ring, in order to never set the TDT register with the
463                  *      same value stored in parallel by the NIC in the TDH
464                  *      register, which makes the TX engine of the NIC enter
465                  *      in a deadlock situation.
466                  *
467                  *      By extension, avoid to allocate a free descriptor that
468                  *      belongs to the last set of free descriptors allocated
469                  *      to the same packet previously transmitted.
470                  */
471
472                 /*
473                  * The "last descriptor" of the previously sent packet, if any,
474                  * which used the last descriptor to allocate.
475                  */
476                 tx_end = sw_ring[tx_last].last_id;
477
478                 /*
479                  * The next descriptor following that "last descriptor" in the
480                  * ring.
481                  */
482                 tx_end = sw_ring[tx_end].next_id;
483
484                 /*
485                  * The "last descriptor" associated with that next descriptor.
486                  */
487                 tx_end = sw_ring[tx_end].last_id;
488
489                 /*
490                  * Check that this descriptor is free.
491                  */
492                 if (! (txr[tx_end].wb.status & E1000_TXD_STAT_DD)) {
493                         if (nb_tx == 0)
494                                 return 0;
495                         goto end_of_tx;
496                 }
497
498                 /*
499                  * Set common flags of all TX Data Descriptors.
500                  *
501                  * The following bits must be set in all Data Descriptors:
502                  *   - E1000_ADVTXD_DTYP_DATA
503                  *   - E1000_ADVTXD_DCMD_DEXT
504                  *
505                  * The following bits must be set in the first Data Descriptor
506                  * and are ignored in the other ones:
507                  *   - E1000_ADVTXD_DCMD_IFCS
508                  *   - E1000_ADVTXD_MAC_1588
509                  *   - E1000_ADVTXD_DCMD_VLE
510                  *
511                  * The following bits must only be set in the last Data
512                  * Descriptor:
513                  *   - E1000_TXD_CMD_EOP
514                  *
515                  * The following bits can be set in any Data Descriptor, but
516                  * are only set in the last Data Descriptor:
517                  *   - E1000_TXD_CMD_RS
518                  */
519                 cmd_type_len = txq->txd_type |
520                         E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
521                 if (tx_ol_req & PKT_TX_TCP_SEG)
522                         pkt_len -= (tx_pkt->l2_len + tx_pkt->l3_len + tx_pkt->l4_len);
523                 olinfo_status = (pkt_len << E1000_ADVTXD_PAYLEN_SHIFT);
524 #if defined(RTE_LIBRTE_IEEE1588)
525                 if (ol_flags & PKT_TX_IEEE1588_TMST)
526                         cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
527 #endif
528                 if (tx_ol_req) {
529                         /* Setup TX Advanced context descriptor if required */
530                         if (new_ctx) {
531                                 volatile struct e1000_adv_tx_context_desc *
532                                     ctx_txd;
533
534                                 ctx_txd = (volatile struct
535                                     e1000_adv_tx_context_desc *)
536                                     &txr[tx_id];
537
538                                 txn = &sw_ring[txe->next_id];
539                                 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
540
541                                 if (txe->mbuf != NULL) {
542                                         rte_pktmbuf_free_seg(txe->mbuf);
543                                         txe->mbuf = NULL;
544                                 }
545
546                                 igbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req, tx_offload);
547
548                                 txe->last_id = tx_last;
549                                 tx_id = txe->next_id;
550                                 txe = txn;
551                         }
552
553                         /* Setup the TX Advanced Data Descriptor */
554                         cmd_type_len  |= tx_desc_vlan_flags_to_cmdtype(tx_ol_req);
555                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(tx_ol_req);
556                         olinfo_status |= (ctx << E1000_ADVTXD_IDX_SHIFT);
557                 }
558
559                 m_seg = tx_pkt;
560                 do {
561                         txn = &sw_ring[txe->next_id];
562                         txd = &txr[tx_id];
563
564                         if (txe->mbuf != NULL)
565                                 rte_pktmbuf_free_seg(txe->mbuf);
566                         txe->mbuf = m_seg;
567
568                         /*
569                          * Set up transmit descriptor.
570                          */
571                         slen = (uint16_t) m_seg->data_len;
572                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
573                         txd->read.buffer_addr =
574                                 rte_cpu_to_le_64(buf_dma_addr);
575                         txd->read.cmd_type_len =
576                                 rte_cpu_to_le_32(cmd_type_len | slen);
577                         txd->read.olinfo_status =
578                                 rte_cpu_to_le_32(olinfo_status);
579                         txe->last_id = tx_last;
580                         tx_id = txe->next_id;
581                         txe = txn;
582                         m_seg = m_seg->next;
583                 } while (m_seg != NULL);
584
585                 /*
586                  * The last packet data descriptor needs End Of Packet (EOP)
587                  * and Report Status (RS).
588                  */
589                 txd->read.cmd_type_len |=
590                         rte_cpu_to_le_32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
591         }
592  end_of_tx:
593         rte_wmb();
594
595         /*
596          * Set the Transmit Descriptor Tail (TDT).
597          */
598         E1000_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
599         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
600                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
601                    (unsigned) tx_id, (unsigned) nb_tx);
602         txq->tx_tail = tx_id;
603
604         return nb_tx;
605 }
606
607 /*********************************************************************
608  *
609  *  TX prep functions
610  *
611  **********************************************************************/
612 uint16_t
613 eth_igb_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
614                 uint16_t nb_pkts)
615 {
616         int i, ret;
617         struct rte_mbuf *m;
618
619         for (i = 0; i < nb_pkts; i++) {
620                 m = tx_pkts[i];
621
622                 /* Check some limitations for TSO in hardware */
623                 if (m->ol_flags & PKT_TX_TCP_SEG)
624                         if ((m->tso_segsz > IGB_TSO_MAX_MSS) ||
625                                         (m->l2_len + m->l3_len + m->l4_len >
626                                         IGB_TSO_MAX_HDRLEN)) {
627                                 rte_errno = -EINVAL;
628                                 return i;
629                         }
630
631                 if (m->ol_flags & IGB_TX_OFFLOAD_NOTSUP_MASK) {
632                         rte_errno = -ENOTSUP;
633                         return i;
634                 }
635
636 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
637                 ret = rte_validate_tx_offload(m);
638                 if (ret != 0) {
639                         rte_errno = ret;
640                         return i;
641                 }
642 #endif
643                 ret = rte_net_intel_cksum_prepare(m);
644                 if (ret != 0) {
645                         rte_errno = ret;
646                         return i;
647                 }
648         }
649
650         return i;
651 }
652
653 /*********************************************************************
654  *
655  *  RX functions
656  *
657  **********************************************************************/
658 #define IGB_PACKET_TYPE_IPV4              0X01
659 #define IGB_PACKET_TYPE_IPV4_TCP          0X11
660 #define IGB_PACKET_TYPE_IPV4_UDP          0X21
661 #define IGB_PACKET_TYPE_IPV4_SCTP         0X41
662 #define IGB_PACKET_TYPE_IPV4_EXT          0X03
663 #define IGB_PACKET_TYPE_IPV4_EXT_SCTP     0X43
664 #define IGB_PACKET_TYPE_IPV6              0X04
665 #define IGB_PACKET_TYPE_IPV6_TCP          0X14
666 #define IGB_PACKET_TYPE_IPV6_UDP          0X24
667 #define IGB_PACKET_TYPE_IPV6_EXT          0X0C
668 #define IGB_PACKET_TYPE_IPV6_EXT_TCP      0X1C
669 #define IGB_PACKET_TYPE_IPV6_EXT_UDP      0X2C
670 #define IGB_PACKET_TYPE_IPV4_IPV6         0X05
671 #define IGB_PACKET_TYPE_IPV4_IPV6_TCP     0X15
672 #define IGB_PACKET_TYPE_IPV4_IPV6_UDP     0X25
673 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT     0X0D
674 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
675 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
676 #define IGB_PACKET_TYPE_MAX               0X80
677 #define IGB_PACKET_TYPE_MASK              0X7F
678 #define IGB_PACKET_TYPE_SHIFT             0X04
679 static inline uint32_t
680 igb_rxd_pkt_info_to_pkt_type(uint16_t pkt_info)
681 {
682         static const uint32_t
683                 ptype_table[IGB_PACKET_TYPE_MAX] __rte_cache_aligned = {
684                 [IGB_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
685                         RTE_PTYPE_L3_IPV4,
686                 [IGB_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
687                         RTE_PTYPE_L3_IPV4_EXT,
688                 [IGB_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
689                         RTE_PTYPE_L3_IPV6,
690                 [IGB_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
691                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
692                         RTE_PTYPE_INNER_L3_IPV6,
693                 [IGB_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
694                         RTE_PTYPE_L3_IPV6_EXT,
695                 [IGB_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
696                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
697                         RTE_PTYPE_INNER_L3_IPV6_EXT,
698                 [IGB_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
699                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
700                 [IGB_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
701                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
702                 [IGB_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
703                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
704                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
705                 [IGB_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
706                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
707                 [IGB_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
708                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
709                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
710                 [IGB_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
711                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
712                 [IGB_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
713                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
714                 [IGB_PACKET_TYPE_IPV4_IPV6_UDP] =  RTE_PTYPE_L2_ETHER |
715                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
716                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
717                 [IGB_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
718                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
719                 [IGB_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
720                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
721                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
722                 [IGB_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
723                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
724                 [IGB_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
725                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
726         };
727         if (unlikely(pkt_info & E1000_RXDADV_PKTTYPE_ETQF))
728                 return RTE_PTYPE_UNKNOWN;
729
730         pkt_info = (pkt_info >> IGB_PACKET_TYPE_SHIFT) & IGB_PACKET_TYPE_MASK;
731
732         return ptype_table[pkt_info];
733 }
734
735 static inline uint64_t
736 rx_desc_hlen_type_rss_to_pkt_flags(struct igb_rx_queue *rxq, uint32_t hl_tp_rs)
737 {
738         uint64_t pkt_flags = ((hl_tp_rs & 0x0F) == 0) ?  0 : PKT_RX_RSS_HASH;
739
740 #if defined(RTE_LIBRTE_IEEE1588)
741         static uint32_t ip_pkt_etqf_map[8] = {
742                 0, 0, 0, PKT_RX_IEEE1588_PTP,
743                 0, 0, 0, 0,
744         };
745
746         struct rte_eth_dev dev = rte_eth_devices[rxq->port_id];
747         struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev.data->dev_private);
748
749         /* EtherType is in bits 8:10 in Packet Type, and not in the default 0:2 */
750         if (hw->mac.type == e1000_i210)
751                 pkt_flags |= ip_pkt_etqf_map[(hl_tp_rs >> 12) & 0x07];
752         else
753                 pkt_flags |= ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07];
754 #else
755         RTE_SET_USED(rxq);
756 #endif
757
758         return pkt_flags;
759 }
760
761 static inline uint64_t
762 rx_desc_status_to_pkt_flags(uint32_t rx_status)
763 {
764         uint64_t pkt_flags;
765
766         /* Check if VLAN present */
767         pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ?
768                 PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED : 0);
769
770 #if defined(RTE_LIBRTE_IEEE1588)
771         if (rx_status & E1000_RXD_STAT_TMST)
772                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
773 #endif
774         return pkt_flags;
775 }
776
777 static inline uint64_t
778 rx_desc_error_to_pkt_flags(uint32_t rx_status)
779 {
780         /*
781          * Bit 30: IPE, IPv4 checksum error
782          * Bit 29: L4I, L4I integrity error
783          */
784
785         static uint64_t error_to_pkt_flags_map[4] = {
786                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
787                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
788                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
789                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
790         };
791         return error_to_pkt_flags_map[(rx_status >>
792                 E1000_RXD_ERR_CKSUM_BIT) & E1000_RXD_ERR_CKSUM_MSK];
793 }
794
795 uint16_t
796 eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
797                uint16_t nb_pkts)
798 {
799         struct igb_rx_queue *rxq;
800         volatile union e1000_adv_rx_desc *rx_ring;
801         volatile union e1000_adv_rx_desc *rxdp;
802         struct igb_rx_entry *sw_ring;
803         struct igb_rx_entry *rxe;
804         struct rte_mbuf *rxm;
805         struct rte_mbuf *nmb;
806         union e1000_adv_rx_desc rxd;
807         uint64_t dma_addr;
808         uint32_t staterr;
809         uint32_t hlen_type_rss;
810         uint16_t pkt_len;
811         uint16_t rx_id;
812         uint16_t nb_rx;
813         uint16_t nb_hold;
814         uint64_t pkt_flags;
815
816         nb_rx = 0;
817         nb_hold = 0;
818         rxq = rx_queue;
819         rx_id = rxq->rx_tail;
820         rx_ring = rxq->rx_ring;
821         sw_ring = rxq->sw_ring;
822         while (nb_rx < nb_pkts) {
823                 /*
824                  * The order of operations here is important as the DD status
825                  * bit must not be read after any other descriptor fields.
826                  * rx_ring and rxdp are pointing to volatile data so the order
827                  * of accesses cannot be reordered by the compiler. If they were
828                  * not volatile, they could be reordered which could lead to
829                  * using invalid descriptor fields when read from rxd.
830                  */
831                 rxdp = &rx_ring[rx_id];
832                 staterr = rxdp->wb.upper.status_error;
833                 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
834                         break;
835                 rxd = *rxdp;
836
837                 /*
838                  * End of packet.
839                  *
840                  * If the E1000_RXD_STAT_EOP flag is not set, the RX packet is
841                  * likely to be invalid and to be dropped by the various
842                  * validation checks performed by the network stack.
843                  *
844                  * Allocate a new mbuf to replenish the RX ring descriptor.
845                  * If the allocation fails:
846                  *    - arrange for that RX descriptor to be the first one
847                  *      being parsed the next time the receive function is
848                  *      invoked [on the same queue].
849                  *
850                  *    - Stop parsing the RX ring and return immediately.
851                  *
852                  * This policy do not drop the packet received in the RX
853                  * descriptor for which the allocation of a new mbuf failed.
854                  * Thus, it allows that packet to be later retrieved if
855                  * mbuf have been freed in the mean time.
856                  * As a side effect, holding RX descriptors instead of
857                  * systematically giving them back to the NIC may lead to
858                  * RX ring exhaustion situations.
859                  * However, the NIC can gracefully prevent such situations
860                  * to happen by sending specific "back-pressure" flow control
861                  * frames to its peer(s).
862                  */
863                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
864                            "staterr=0x%x pkt_len=%u",
865                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
866                            (unsigned) rx_id, (unsigned) staterr,
867                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
868
869                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
870                 if (nmb == NULL) {
871                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
872                                    "queue_id=%u", (unsigned) rxq->port_id,
873                                    (unsigned) rxq->queue_id);
874                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
875                         break;
876                 }
877
878                 nb_hold++;
879                 rxe = &sw_ring[rx_id];
880                 rx_id++;
881                 if (rx_id == rxq->nb_rx_desc)
882                         rx_id = 0;
883
884                 /* Prefetch next mbuf while processing current one. */
885                 rte_igb_prefetch(sw_ring[rx_id].mbuf);
886
887                 /*
888                  * When next RX descriptor is on a cache-line boundary,
889                  * prefetch the next 4 RX descriptors and the next 8 pointers
890                  * to mbufs.
891                  */
892                 if ((rx_id & 0x3) == 0) {
893                         rte_igb_prefetch(&rx_ring[rx_id]);
894                         rte_igb_prefetch(&sw_ring[rx_id]);
895                 }
896
897                 rxm = rxe->mbuf;
898                 rxe->mbuf = nmb;
899                 dma_addr =
900                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
901                 rxdp->read.hdr_addr = 0;
902                 rxdp->read.pkt_addr = dma_addr;
903
904                 /*
905                  * Initialize the returned mbuf.
906                  * 1) setup generic mbuf fields:
907                  *    - number of segments,
908                  *    - next segment,
909                  *    - packet length,
910                  *    - RX port identifier.
911                  * 2) integrate hardware offload data, if any:
912                  *    - RSS flag & hash,
913                  *    - IP checksum flag,
914                  *    - VLAN TCI, if any,
915                  *    - error flags.
916                  */
917                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
918                                       rxq->crc_len);
919                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
920                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
921                 rxm->nb_segs = 1;
922                 rxm->next = NULL;
923                 rxm->pkt_len = pkt_len;
924                 rxm->data_len = pkt_len;
925                 rxm->port = rxq->port_id;
926
927                 rxm->hash.rss = rxd.wb.lower.hi_dword.rss;
928                 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
929
930                 /*
931                  * The vlan_tci field is only valid when PKT_RX_VLAN is
932                  * set in the pkt_flags field and must be in CPU byte order.
933                  */
934                 if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
935                                 (rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
936                         rxm->vlan_tci = rte_be_to_cpu_16(rxd.wb.upper.vlan);
937                 } else {
938                         rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
939                 }
940                 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
941                 pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
942                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
943                 rxm->ol_flags = pkt_flags;
944                 rxm->packet_type = igb_rxd_pkt_info_to_pkt_type(rxd.wb.lower.
945                                                 lo_dword.hs_rss.pkt_info);
946
947                 /*
948                  * Store the mbuf address into the next entry of the array
949                  * of returned packets.
950                  */
951                 rx_pkts[nb_rx++] = rxm;
952         }
953         rxq->rx_tail = rx_id;
954
955         /*
956          * If the number of free RX descriptors is greater than the RX free
957          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
958          * register.
959          * Update the RDT with the value of the last processed RX descriptor
960          * minus 1, to guarantee that the RDT register is never equal to the
961          * RDH register, which creates a "full" ring situtation from the
962          * hardware point of view...
963          */
964         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
965         if (nb_hold > rxq->rx_free_thresh) {
966                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
967                            "nb_hold=%u nb_rx=%u",
968                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
969                            (unsigned) rx_id, (unsigned) nb_hold,
970                            (unsigned) nb_rx);
971                 rx_id = (uint16_t) ((rx_id == 0) ?
972                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
973                 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
974                 nb_hold = 0;
975         }
976         rxq->nb_rx_hold = nb_hold;
977         return nb_rx;
978 }
979
980 uint16_t
981 eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
982                          uint16_t nb_pkts)
983 {
984         struct igb_rx_queue *rxq;
985         volatile union e1000_adv_rx_desc *rx_ring;
986         volatile union e1000_adv_rx_desc *rxdp;
987         struct igb_rx_entry *sw_ring;
988         struct igb_rx_entry *rxe;
989         struct rte_mbuf *first_seg;
990         struct rte_mbuf *last_seg;
991         struct rte_mbuf *rxm;
992         struct rte_mbuf *nmb;
993         union e1000_adv_rx_desc rxd;
994         uint64_t dma; /* Physical address of mbuf data buffer */
995         uint32_t staterr;
996         uint32_t hlen_type_rss;
997         uint16_t rx_id;
998         uint16_t nb_rx;
999         uint16_t nb_hold;
1000         uint16_t data_len;
1001         uint64_t pkt_flags;
1002
1003         nb_rx = 0;
1004         nb_hold = 0;
1005         rxq = rx_queue;
1006         rx_id = rxq->rx_tail;
1007         rx_ring = rxq->rx_ring;
1008         sw_ring = rxq->sw_ring;
1009
1010         /*
1011          * Retrieve RX context of current packet, if any.
1012          */
1013         first_seg = rxq->pkt_first_seg;
1014         last_seg = rxq->pkt_last_seg;
1015
1016         while (nb_rx < nb_pkts) {
1017         next_desc:
1018                 /*
1019                  * The order of operations here is important as the DD status
1020                  * bit must not be read after any other descriptor fields.
1021                  * rx_ring and rxdp are pointing to volatile data so the order
1022                  * of accesses cannot be reordered by the compiler. If they were
1023                  * not volatile, they could be reordered which could lead to
1024                  * using invalid descriptor fields when read from rxd.
1025                  */
1026                 rxdp = &rx_ring[rx_id];
1027                 staterr = rxdp->wb.upper.status_error;
1028                 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
1029                         break;
1030                 rxd = *rxdp;
1031
1032                 /*
1033                  * Descriptor done.
1034                  *
1035                  * Allocate a new mbuf to replenish the RX ring descriptor.
1036                  * If the allocation fails:
1037                  *    - arrange for that RX descriptor to be the first one
1038                  *      being parsed the next time the receive function is
1039                  *      invoked [on the same queue].
1040                  *
1041                  *    - Stop parsing the RX ring and return immediately.
1042                  *
1043                  * This policy does not drop the packet received in the RX
1044                  * descriptor for which the allocation of a new mbuf failed.
1045                  * Thus, it allows that packet to be later retrieved if
1046                  * mbuf have been freed in the mean time.
1047                  * As a side effect, holding RX descriptors instead of
1048                  * systematically giving them back to the NIC may lead to
1049                  * RX ring exhaustion situations.
1050                  * However, the NIC can gracefully prevent such situations
1051                  * to happen by sending specific "back-pressure" flow control
1052                  * frames to its peer(s).
1053                  */
1054                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1055                            "staterr=0x%x data_len=%u",
1056                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1057                            (unsigned) rx_id, (unsigned) staterr,
1058                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1059
1060                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1061                 if (nmb == NULL) {
1062                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1063                                    "queue_id=%u", (unsigned) rxq->port_id,
1064                                    (unsigned) rxq->queue_id);
1065                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1066                         break;
1067                 }
1068
1069                 nb_hold++;
1070                 rxe = &sw_ring[rx_id];
1071                 rx_id++;
1072                 if (rx_id == rxq->nb_rx_desc)
1073                         rx_id = 0;
1074
1075                 /* Prefetch next mbuf while processing current one. */
1076                 rte_igb_prefetch(sw_ring[rx_id].mbuf);
1077
1078                 /*
1079                  * When next RX descriptor is on a cache-line boundary,
1080                  * prefetch the next 4 RX descriptors and the next 8 pointers
1081                  * to mbufs.
1082                  */
1083                 if ((rx_id & 0x3) == 0) {
1084                         rte_igb_prefetch(&rx_ring[rx_id]);
1085                         rte_igb_prefetch(&sw_ring[rx_id]);
1086                 }
1087
1088                 /*
1089                  * Update RX descriptor with the physical address of the new
1090                  * data buffer of the new allocated mbuf.
1091                  */
1092                 rxm = rxe->mbuf;
1093                 rxe->mbuf = nmb;
1094                 dma = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1095                 rxdp->read.pkt_addr = dma;
1096                 rxdp->read.hdr_addr = 0;
1097
1098                 /*
1099                  * Set data length & data buffer address of mbuf.
1100                  */
1101                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1102                 rxm->data_len = data_len;
1103                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1104
1105                 /*
1106                  * If this is the first buffer of the received packet,
1107                  * set the pointer to the first mbuf of the packet and
1108                  * initialize its context.
1109                  * Otherwise, update the total length and the number of segments
1110                  * of the current scattered packet, and update the pointer to
1111                  * the last mbuf of the current packet.
1112                  */
1113                 if (first_seg == NULL) {
1114                         first_seg = rxm;
1115                         first_seg->pkt_len = data_len;
1116                         first_seg->nb_segs = 1;
1117                 } else {
1118                         first_seg->pkt_len += data_len;
1119                         first_seg->nb_segs++;
1120                         last_seg->next = rxm;
1121                 }
1122
1123                 /*
1124                  * If this is not the last buffer of the received packet,
1125                  * update the pointer to the last mbuf of the current scattered
1126                  * packet and continue to parse the RX ring.
1127                  */
1128                 if (! (staterr & E1000_RXD_STAT_EOP)) {
1129                         last_seg = rxm;
1130                         goto next_desc;
1131                 }
1132
1133                 /*
1134                  * This is the last buffer of the received packet.
1135                  * If the CRC is not stripped by the hardware:
1136                  *   - Subtract the CRC length from the total packet length.
1137                  *   - If the last buffer only contains the whole CRC or a part
1138                  *     of it, free the mbuf associated to the last buffer.
1139                  *     If part of the CRC is also contained in the previous
1140                  *     mbuf, subtract the length of that CRC part from the
1141                  *     data length of the previous mbuf.
1142                  */
1143                 rxm->next = NULL;
1144                 if (unlikely(rxq->crc_len > 0)) {
1145                         first_seg->pkt_len -= ETHER_CRC_LEN;
1146                         if (data_len <= ETHER_CRC_LEN) {
1147                                 rte_pktmbuf_free_seg(rxm);
1148                                 first_seg->nb_segs--;
1149                                 last_seg->data_len = (uint16_t)
1150                                         (last_seg->data_len -
1151                                          (ETHER_CRC_LEN - data_len));
1152                                 last_seg->next = NULL;
1153                         } else
1154                                 rxm->data_len =
1155                                         (uint16_t) (data_len - ETHER_CRC_LEN);
1156                 }
1157
1158                 /*
1159                  * Initialize the first mbuf of the returned packet:
1160                  *    - RX port identifier,
1161                  *    - hardware offload data, if any:
1162                  *      - RSS flag & hash,
1163                  *      - IP checksum flag,
1164                  *      - VLAN TCI, if any,
1165                  *      - error flags.
1166                  */
1167                 first_seg->port = rxq->port_id;
1168                 first_seg->hash.rss = rxd.wb.lower.hi_dword.rss;
1169
1170                 /*
1171                  * The vlan_tci field is only valid when PKT_RX_VLAN is
1172                  * set in the pkt_flags field and must be in CPU byte order.
1173                  */
1174                 if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
1175                                 (rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
1176                         first_seg->vlan_tci =
1177                                 rte_be_to_cpu_16(rxd.wb.upper.vlan);
1178                 } else {
1179                         first_seg->vlan_tci =
1180                                 rte_le_to_cpu_16(rxd.wb.upper.vlan);
1181                 }
1182                 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1183                 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
1184                 pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
1185                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1186                 first_seg->ol_flags = pkt_flags;
1187                 first_seg->packet_type = igb_rxd_pkt_info_to_pkt_type(rxd.wb.
1188                                         lower.lo_dword.hs_rss.pkt_info);
1189
1190                 /* Prefetch data of first segment, if configured to do so. */
1191                 rte_packet_prefetch((char *)first_seg->buf_addr +
1192                         first_seg->data_off);
1193
1194                 /*
1195                  * Store the mbuf address into the next entry of the array
1196                  * of returned packets.
1197                  */
1198                 rx_pkts[nb_rx++] = first_seg;
1199
1200                 /*
1201                  * Setup receipt context for a new packet.
1202                  */
1203                 first_seg = NULL;
1204         }
1205
1206         /*
1207          * Record index of the next RX descriptor to probe.
1208          */
1209         rxq->rx_tail = rx_id;
1210
1211         /*
1212          * Save receive context.
1213          */
1214         rxq->pkt_first_seg = first_seg;
1215         rxq->pkt_last_seg = last_seg;
1216
1217         /*
1218          * If the number of free RX descriptors is greater than the RX free
1219          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1220          * register.
1221          * Update the RDT with the value of the last processed RX descriptor
1222          * minus 1, to guarantee that the RDT register is never equal to the
1223          * RDH register, which creates a "full" ring situtation from the
1224          * hardware point of view...
1225          */
1226         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1227         if (nb_hold > rxq->rx_free_thresh) {
1228                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1229                            "nb_hold=%u nb_rx=%u",
1230                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1231                            (unsigned) rx_id, (unsigned) nb_hold,
1232                            (unsigned) nb_rx);
1233                 rx_id = (uint16_t) ((rx_id == 0) ?
1234                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1235                 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1236                 nb_hold = 0;
1237         }
1238         rxq->nb_rx_hold = nb_hold;
1239         return nb_rx;
1240 }
1241
1242 /*
1243  * Maximum number of Ring Descriptors.
1244  *
1245  * Since RDLEN/TDLEN should be multiple of 128bytes, the number of ring
1246  * desscriptors should meet the following condition:
1247  *      (num_ring_desc * sizeof(struct e1000_rx/tx_desc)) % 128 == 0
1248  */
1249
1250 static void
1251 igb_tx_queue_release_mbufs(struct igb_tx_queue *txq)
1252 {
1253         unsigned i;
1254
1255         if (txq->sw_ring != NULL) {
1256                 for (i = 0; i < txq->nb_tx_desc; i++) {
1257                         if (txq->sw_ring[i].mbuf != NULL) {
1258                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1259                                 txq->sw_ring[i].mbuf = NULL;
1260                         }
1261                 }
1262         }
1263 }
1264
1265 static void
1266 igb_tx_queue_release(struct igb_tx_queue *txq)
1267 {
1268         if (txq != NULL) {
1269                 igb_tx_queue_release_mbufs(txq);
1270                 rte_free(txq->sw_ring);
1271                 rte_free(txq);
1272         }
1273 }
1274
1275 void
1276 eth_igb_tx_queue_release(void *txq)
1277 {
1278         igb_tx_queue_release(txq);
1279 }
1280
1281 static int
1282 igb_tx_done_cleanup(struct igb_tx_queue *txq, uint32_t free_cnt)
1283 {
1284         struct igb_tx_entry *sw_ring;
1285         volatile union e1000_adv_tx_desc *txr;
1286         uint16_t tx_first; /* First segment analyzed. */
1287         uint16_t tx_id;    /* Current segment being processed. */
1288         uint16_t tx_last;  /* Last segment in the current packet. */
1289         uint16_t tx_next;  /* First segment of the next packet. */
1290         int count;
1291
1292         if (txq != NULL) {
1293                 count = 0;
1294                 sw_ring = txq->sw_ring;
1295                 txr = txq->tx_ring;
1296
1297                 /*
1298                  * tx_tail is the last sent packet on the sw_ring. Goto the end
1299                  * of that packet (the last segment in the packet chain) and
1300                  * then the next segment will be the start of the oldest segment
1301                  * in the sw_ring. This is the first packet that will be
1302                  * attempted to be freed.
1303                  */
1304
1305                 /* Get last segment in most recently added packet. */
1306                 tx_first = sw_ring[txq->tx_tail].last_id;
1307
1308                 /* Get the next segment, which is the oldest segment in ring. */
1309                 tx_first = sw_ring[tx_first].next_id;
1310
1311                 /* Set the current index to the first. */
1312                 tx_id = tx_first;
1313
1314                 /*
1315                  * Loop through each packet. For each packet, verify that an
1316                  * mbuf exists and that the last segment is free. If so, free
1317                  * it and move on.
1318                  */
1319                 while (1) {
1320                         tx_last = sw_ring[tx_id].last_id;
1321
1322                         if (sw_ring[tx_last].mbuf) {
1323                                 if (txr[tx_last].wb.status &
1324                                                 E1000_TXD_STAT_DD) {
1325                                         /*
1326                                          * Increment the number of packets
1327                                          * freed.
1328                                          */
1329                                         count++;
1330
1331                                         /* Get the start of the next packet. */
1332                                         tx_next = sw_ring[tx_last].next_id;
1333
1334                                         /*
1335                                          * Loop through all segments in a
1336                                          * packet.
1337                                          */
1338                                         do {
1339                                                 rte_pktmbuf_free_seg(sw_ring[tx_id].mbuf);
1340                                                 sw_ring[tx_id].mbuf = NULL;
1341                                                 sw_ring[tx_id].last_id = tx_id;
1342
1343                                                 /* Move to next segemnt. */
1344                                                 tx_id = sw_ring[tx_id].next_id;
1345
1346                                         } while (tx_id != tx_next);
1347
1348                                         if (unlikely(count == (int)free_cnt))
1349                                                 break;
1350                                 } else
1351                                         /*
1352                                          * mbuf still in use, nothing left to
1353                                          * free.
1354                                          */
1355                                         break;
1356                         } else {
1357                                 /*
1358                                  * There are multiple reasons to be here:
1359                                  * 1) All the packets on the ring have been
1360                                  *    freed - tx_id is equal to tx_first
1361                                  *    and some packets have been freed.
1362                                  *    - Done, exit
1363                                  * 2) Interfaces has not sent a rings worth of
1364                                  *    packets yet, so the segment after tail is
1365                                  *    still empty. Or a previous call to this
1366                                  *    function freed some of the segments but
1367                                  *    not all so there is a hole in the list.
1368                                  *    Hopefully this is a rare case.
1369                                  *    - Walk the list and find the next mbuf. If
1370                                  *      there isn't one, then done.
1371                                  */
1372                                 if (likely((tx_id == tx_first) && (count != 0)))
1373                                         break;
1374
1375                                 /*
1376                                  * Walk the list and find the next mbuf, if any.
1377                                  */
1378                                 do {
1379                                         /* Move to next segemnt. */
1380                                         tx_id = sw_ring[tx_id].next_id;
1381
1382                                         if (sw_ring[tx_id].mbuf)
1383                                                 break;
1384
1385                                 } while (tx_id != tx_first);
1386
1387                                 /*
1388                                  * Determine why previous loop bailed. If there
1389                                  * is not an mbuf, done.
1390                                  */
1391                                 if (sw_ring[tx_id].mbuf == NULL)
1392                                         break;
1393                         }
1394                 }
1395         } else
1396                 count = -ENODEV;
1397
1398         return count;
1399 }
1400
1401 int
1402 eth_igb_tx_done_cleanup(void *txq, uint32_t free_cnt)
1403 {
1404         return igb_tx_done_cleanup(txq, free_cnt);
1405 }
1406
1407 static void
1408 igb_reset_tx_queue_stat(struct igb_tx_queue *txq)
1409 {
1410         txq->tx_head = 0;
1411         txq->tx_tail = 0;
1412         txq->ctx_curr = 0;
1413         memset((void*)&txq->ctx_cache, 0,
1414                 IGB_CTX_NUM * sizeof(struct igb_advctx_info));
1415 }
1416
1417 static void
1418 igb_reset_tx_queue(struct igb_tx_queue *txq, struct rte_eth_dev *dev)
1419 {
1420         static const union e1000_adv_tx_desc zeroed_desc = {{0}};
1421         struct igb_tx_entry *txe = txq->sw_ring;
1422         uint16_t i, prev;
1423         struct e1000_hw *hw;
1424
1425         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1426         /* Zero out HW ring memory */
1427         for (i = 0; i < txq->nb_tx_desc; i++) {
1428                 txq->tx_ring[i] = zeroed_desc;
1429         }
1430
1431         /* Initialize ring entries */
1432         prev = (uint16_t)(txq->nb_tx_desc - 1);
1433         for (i = 0; i < txq->nb_tx_desc; i++) {
1434                 volatile union e1000_adv_tx_desc *txd = &(txq->tx_ring[i]);
1435
1436                 txd->wb.status = E1000_TXD_STAT_DD;
1437                 txe[i].mbuf = NULL;
1438                 txe[i].last_id = i;
1439                 txe[prev].next_id = i;
1440                 prev = i;
1441         }
1442
1443         txq->txd_type = E1000_ADVTXD_DTYP_DATA;
1444         /* 82575 specific, each tx queue will use 2 hw contexts */
1445         if (hw->mac.type == e1000_82575)
1446                 txq->ctx_start = txq->queue_id * IGB_CTX_NUM;
1447
1448         igb_reset_tx_queue_stat(txq);
1449 }
1450
1451 int
1452 eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
1453                          uint16_t queue_idx,
1454                          uint16_t nb_desc,
1455                          unsigned int socket_id,
1456                          const struct rte_eth_txconf *tx_conf)
1457 {
1458         const struct rte_memzone *tz;
1459         struct igb_tx_queue *txq;
1460         struct e1000_hw     *hw;
1461         uint32_t size;
1462
1463         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1464
1465         /*
1466          * Validate number of transmit descriptors.
1467          * It must not exceed hardware maximum, and must be multiple
1468          * of E1000_ALIGN.
1469          */
1470         if (nb_desc % IGB_TXD_ALIGN != 0 ||
1471                         (nb_desc > E1000_MAX_RING_DESC) ||
1472                         (nb_desc < E1000_MIN_RING_DESC)) {
1473                 return -EINVAL;
1474         }
1475
1476         /*
1477          * The tx_free_thresh and tx_rs_thresh values are not used in the 1G
1478          * driver.
1479          */
1480         if (tx_conf->tx_free_thresh != 0)
1481                 PMD_INIT_LOG(INFO, "The tx_free_thresh parameter is not "
1482                              "used for the 1G driver.");
1483         if (tx_conf->tx_rs_thresh != 0)
1484                 PMD_INIT_LOG(INFO, "The tx_rs_thresh parameter is not "
1485                              "used for the 1G driver.");
1486         if (tx_conf->tx_thresh.wthresh == 0 && hw->mac.type != e1000_82576)
1487                 PMD_INIT_LOG(INFO, "To improve 1G driver performance, "
1488                              "consider setting the TX WTHRESH value to 4, 8, "
1489                              "or 16.");
1490
1491         /* Free memory prior to re-allocation if needed */
1492         if (dev->data->tx_queues[queue_idx] != NULL) {
1493                 igb_tx_queue_release(dev->data->tx_queues[queue_idx]);
1494                 dev->data->tx_queues[queue_idx] = NULL;
1495         }
1496
1497         /* First allocate the tx queue data structure */
1498         txq = rte_zmalloc("ethdev TX queue", sizeof(struct igb_tx_queue),
1499                                                         RTE_CACHE_LINE_SIZE);
1500         if (txq == NULL)
1501                 return -ENOMEM;
1502
1503         /*
1504          * Allocate TX ring hardware descriptors. A memzone large enough to
1505          * handle the maximum ring size is allocated in order to allow for
1506          * resizing in later calls to the queue setup function.
1507          */
1508         size = sizeof(union e1000_adv_tx_desc) * E1000_MAX_RING_DESC;
1509         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, size,
1510                                       E1000_ALIGN, socket_id);
1511         if (tz == NULL) {
1512                 igb_tx_queue_release(txq);
1513                 return -ENOMEM;
1514         }
1515
1516         txq->nb_tx_desc = nb_desc;
1517         txq->pthresh = tx_conf->tx_thresh.pthresh;
1518         txq->hthresh = tx_conf->tx_thresh.hthresh;
1519         txq->wthresh = tx_conf->tx_thresh.wthresh;
1520         if (txq->wthresh > 0 && hw->mac.type == e1000_82576)
1521                 txq->wthresh = 1;
1522         txq->queue_id = queue_idx;
1523         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1524                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1525         txq->port_id = dev->data->port_id;
1526
1527         txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(txq->reg_idx));
1528         txq->tx_ring_phys_addr = tz->iova;
1529
1530         txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
1531         /* Allocate software ring */
1532         txq->sw_ring = rte_zmalloc("txq->sw_ring",
1533                                    sizeof(struct igb_tx_entry) * nb_desc,
1534                                    RTE_CACHE_LINE_SIZE);
1535         if (txq->sw_ring == NULL) {
1536                 igb_tx_queue_release(txq);
1537                 return -ENOMEM;
1538         }
1539         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
1540                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
1541
1542         igb_reset_tx_queue(txq, dev);
1543         dev->tx_pkt_burst = eth_igb_xmit_pkts;
1544         dev->tx_pkt_prepare = &eth_igb_prep_pkts;
1545         dev->data->tx_queues[queue_idx] = txq;
1546
1547         return 0;
1548 }
1549
1550 static void
1551 igb_rx_queue_release_mbufs(struct igb_rx_queue *rxq)
1552 {
1553         unsigned i;
1554
1555         if (rxq->sw_ring != NULL) {
1556                 for (i = 0; i < rxq->nb_rx_desc; i++) {
1557                         if (rxq->sw_ring[i].mbuf != NULL) {
1558                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
1559                                 rxq->sw_ring[i].mbuf = NULL;
1560                         }
1561                 }
1562         }
1563 }
1564
1565 static void
1566 igb_rx_queue_release(struct igb_rx_queue *rxq)
1567 {
1568         if (rxq != NULL) {
1569                 igb_rx_queue_release_mbufs(rxq);
1570                 rte_free(rxq->sw_ring);
1571                 rte_free(rxq);
1572         }
1573 }
1574
1575 void
1576 eth_igb_rx_queue_release(void *rxq)
1577 {
1578         igb_rx_queue_release(rxq);
1579 }
1580
1581 static void
1582 igb_reset_rx_queue(struct igb_rx_queue *rxq)
1583 {
1584         static const union e1000_adv_rx_desc zeroed_desc = {{0}};
1585         unsigned i;
1586
1587         /* Zero out HW ring memory */
1588         for (i = 0; i < rxq->nb_rx_desc; i++) {
1589                 rxq->rx_ring[i] = zeroed_desc;
1590         }
1591
1592         rxq->rx_tail = 0;
1593         rxq->pkt_first_seg = NULL;
1594         rxq->pkt_last_seg = NULL;
1595 }
1596
1597 uint64_t
1598 igb_get_rx_port_offloads_capa(struct rte_eth_dev *dev)
1599 {
1600         uint64_t rx_offload_capa;
1601
1602         RTE_SET_USED(dev);
1603         rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP  |
1604                           DEV_RX_OFFLOAD_VLAN_FILTER |
1605                           DEV_RX_OFFLOAD_IPV4_CKSUM  |
1606                           DEV_RX_OFFLOAD_UDP_CKSUM   |
1607                           DEV_RX_OFFLOAD_TCP_CKSUM   |
1608                           DEV_RX_OFFLOAD_JUMBO_FRAME |
1609                           DEV_RX_OFFLOAD_CRC_STRIP   |
1610                           DEV_RX_OFFLOAD_SCATTER;
1611
1612         return rx_offload_capa;
1613 }
1614
1615 uint64_t
1616 igb_get_rx_queue_offloads_capa(struct rte_eth_dev *dev)
1617 {
1618         struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1619         uint64_t rx_queue_offload_capa;
1620
1621         switch (hw->mac.type) {
1622         case e1000_vfadapt_i350:
1623                 /*
1624                  * As only one Rx queue can be used, let per queue offloading
1625                  * capability be same to per port queue offloading capability
1626                  * for better convenience.
1627                  */
1628                 rx_queue_offload_capa = igb_get_rx_port_offloads_capa(dev);
1629                 break;
1630         default:
1631                 rx_queue_offload_capa = 0;
1632         }
1633         return rx_queue_offload_capa;
1634 }
1635
1636 static int
1637 igb_check_rx_queue_offloads(struct rte_eth_dev *dev, uint64_t requested)
1638 {
1639         uint64_t port_offloads = dev->data->dev_conf.rxmode.offloads;
1640         uint64_t queue_supported = igb_get_rx_queue_offloads_capa(dev);
1641         uint64_t port_supported = igb_get_rx_port_offloads_capa(dev);
1642
1643         if ((requested & (queue_supported | port_supported)) != requested)
1644                 return 0;
1645
1646         if ((port_offloads ^ requested) & port_supported)
1647                 return 0;
1648
1649         return 1;
1650 }
1651
1652 int
1653 eth_igb_rx_queue_setup(struct rte_eth_dev *dev,
1654                          uint16_t queue_idx,
1655                          uint16_t nb_desc,
1656                          unsigned int socket_id,
1657                          const struct rte_eth_rxconf *rx_conf,
1658                          struct rte_mempool *mp)
1659 {
1660         const struct rte_memzone *rz;
1661         struct igb_rx_queue *rxq;
1662         struct e1000_hw     *hw;
1663         unsigned int size;
1664
1665         if (!igb_check_rx_queue_offloads(dev, rx_conf->offloads)) {
1666                 PMD_INIT_LOG(ERR, "%p: Rx queue offloads 0x%" PRIx64
1667                         " don't match port offloads 0x%" PRIx64
1668                         " or supported port offloads 0x%" PRIx64
1669                         " or supported queue offloads 0x%" PRIx64,
1670                         (void *)dev,
1671                         rx_conf->offloads,
1672                         dev->data->dev_conf.rxmode.offloads,
1673                         igb_get_rx_port_offloads_capa(dev),
1674                         igb_get_rx_queue_offloads_capa(dev));
1675                 return -ENOTSUP;
1676         }
1677
1678         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1679
1680         /*
1681          * Validate number of receive descriptors.
1682          * It must not exceed hardware maximum, and must be multiple
1683          * of E1000_ALIGN.
1684          */
1685         if (nb_desc % IGB_RXD_ALIGN != 0 ||
1686                         (nb_desc > E1000_MAX_RING_DESC) ||
1687                         (nb_desc < E1000_MIN_RING_DESC)) {
1688                 return -EINVAL;
1689         }
1690
1691         /* Free memory prior to re-allocation if needed */
1692         if (dev->data->rx_queues[queue_idx] != NULL) {
1693                 igb_rx_queue_release(dev->data->rx_queues[queue_idx]);
1694                 dev->data->rx_queues[queue_idx] = NULL;
1695         }
1696
1697         /* First allocate the RX queue data structure. */
1698         rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igb_rx_queue),
1699                           RTE_CACHE_LINE_SIZE);
1700         if (rxq == NULL)
1701                 return -ENOMEM;
1702         rxq->offloads = rx_conf->offloads;
1703         rxq->mb_pool = mp;
1704         rxq->nb_rx_desc = nb_desc;
1705         rxq->pthresh = rx_conf->rx_thresh.pthresh;
1706         rxq->hthresh = rx_conf->rx_thresh.hthresh;
1707         rxq->wthresh = rx_conf->rx_thresh.wthresh;
1708         if (rxq->wthresh > 0 &&
1709             (hw->mac.type == e1000_82576 || hw->mac.type == e1000_vfadapt_i350))
1710                 rxq->wthresh = 1;
1711         rxq->drop_en = rx_conf->rx_drop_en;
1712         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1713         rxq->queue_id = queue_idx;
1714         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1715                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1716         rxq->port_id = dev->data->port_id;
1717         rxq->crc_len = (uint8_t)((dev->data->dev_conf.rxmode.offloads &
1718                         DEV_RX_OFFLOAD_CRC_STRIP) ? 0 : ETHER_CRC_LEN);
1719
1720         /*
1721          *  Allocate RX ring hardware descriptors. A memzone large enough to
1722          *  handle the maximum ring size is allocated in order to allow for
1723          *  resizing in later calls to the queue setup function.
1724          */
1725         size = sizeof(union e1000_adv_rx_desc) * E1000_MAX_RING_DESC;
1726         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, size,
1727                                       E1000_ALIGN, socket_id);
1728         if (rz == NULL) {
1729                 igb_rx_queue_release(rxq);
1730                 return -ENOMEM;
1731         }
1732         rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(rxq->reg_idx));
1733         rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(rxq->reg_idx));
1734         rxq->rx_ring_phys_addr = rz->iova;
1735         rxq->rx_ring = (union e1000_adv_rx_desc *) rz->addr;
1736
1737         /* Allocate software ring. */
1738         rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1739                                    sizeof(struct igb_rx_entry) * nb_desc,
1740                                    RTE_CACHE_LINE_SIZE);
1741         if (rxq->sw_ring == NULL) {
1742                 igb_rx_queue_release(rxq);
1743                 return -ENOMEM;
1744         }
1745         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
1746                      rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1747
1748         dev->data->rx_queues[queue_idx] = rxq;
1749         igb_reset_rx_queue(rxq);
1750
1751         return 0;
1752 }
1753
1754 uint32_t
1755 eth_igb_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1756 {
1757 #define IGB_RXQ_SCAN_INTERVAL 4
1758         volatile union e1000_adv_rx_desc *rxdp;
1759         struct igb_rx_queue *rxq;
1760         uint32_t desc = 0;
1761
1762         rxq = dev->data->rx_queues[rx_queue_id];
1763         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
1764
1765         while ((desc < rxq->nb_rx_desc) &&
1766                 (rxdp->wb.upper.status_error & E1000_RXD_STAT_DD)) {
1767                 desc += IGB_RXQ_SCAN_INTERVAL;
1768                 rxdp += IGB_RXQ_SCAN_INTERVAL;
1769                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
1770                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
1771                                 desc - rxq->nb_rx_desc]);
1772         }
1773
1774         return desc;
1775 }
1776
1777 int
1778 eth_igb_rx_descriptor_done(void *rx_queue, uint16_t offset)
1779 {
1780         volatile union e1000_adv_rx_desc *rxdp;
1781         struct igb_rx_queue *rxq = rx_queue;
1782         uint32_t desc;
1783
1784         if (unlikely(offset >= rxq->nb_rx_desc))
1785                 return 0;
1786         desc = rxq->rx_tail + offset;
1787         if (desc >= rxq->nb_rx_desc)
1788                 desc -= rxq->nb_rx_desc;
1789
1790         rxdp = &rxq->rx_ring[desc];
1791         return !!(rxdp->wb.upper.status_error & E1000_RXD_STAT_DD);
1792 }
1793
1794 int
1795 eth_igb_rx_descriptor_status(void *rx_queue, uint16_t offset)
1796 {
1797         struct igb_rx_queue *rxq = rx_queue;
1798         volatile uint32_t *status;
1799         uint32_t desc;
1800
1801         if (unlikely(offset >= rxq->nb_rx_desc))
1802                 return -EINVAL;
1803
1804         if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold)
1805                 return RTE_ETH_RX_DESC_UNAVAIL;
1806
1807         desc = rxq->rx_tail + offset;
1808         if (desc >= rxq->nb_rx_desc)
1809                 desc -= rxq->nb_rx_desc;
1810
1811         status = &rxq->rx_ring[desc].wb.upper.status_error;
1812         if (*status & rte_cpu_to_le_32(E1000_RXD_STAT_DD))
1813                 return RTE_ETH_RX_DESC_DONE;
1814
1815         return RTE_ETH_RX_DESC_AVAIL;
1816 }
1817
1818 int
1819 eth_igb_tx_descriptor_status(void *tx_queue, uint16_t offset)
1820 {
1821         struct igb_tx_queue *txq = tx_queue;
1822         volatile uint32_t *status;
1823         uint32_t desc;
1824
1825         if (unlikely(offset >= txq->nb_tx_desc))
1826                 return -EINVAL;
1827
1828         desc = txq->tx_tail + offset;
1829         if (desc >= txq->nb_tx_desc)
1830                 desc -= txq->nb_tx_desc;
1831
1832         status = &txq->tx_ring[desc].wb.status;
1833         if (*status & rte_cpu_to_le_32(E1000_TXD_STAT_DD))
1834                 return RTE_ETH_TX_DESC_DONE;
1835
1836         return RTE_ETH_TX_DESC_FULL;
1837 }
1838
1839 void
1840 igb_dev_clear_queues(struct rte_eth_dev *dev)
1841 {
1842         uint16_t i;
1843         struct igb_tx_queue *txq;
1844         struct igb_rx_queue *rxq;
1845
1846         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1847                 txq = dev->data->tx_queues[i];
1848                 if (txq != NULL) {
1849                         igb_tx_queue_release_mbufs(txq);
1850                         igb_reset_tx_queue(txq, dev);
1851                 }
1852         }
1853
1854         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1855                 rxq = dev->data->rx_queues[i];
1856                 if (rxq != NULL) {
1857                         igb_rx_queue_release_mbufs(rxq);
1858                         igb_reset_rx_queue(rxq);
1859                 }
1860         }
1861 }
1862
1863 void
1864 igb_dev_free_queues(struct rte_eth_dev *dev)
1865 {
1866         uint16_t i;
1867
1868         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1869                 eth_igb_rx_queue_release(dev->data->rx_queues[i]);
1870                 dev->data->rx_queues[i] = NULL;
1871         }
1872         dev->data->nb_rx_queues = 0;
1873
1874         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1875                 eth_igb_tx_queue_release(dev->data->tx_queues[i]);
1876                 dev->data->tx_queues[i] = NULL;
1877         }
1878         dev->data->nb_tx_queues = 0;
1879 }
1880
1881 /**
1882  * Receive Side Scaling (RSS).
1883  * See section 7.1.1.7 in the following document:
1884  *     "Intel 82576 GbE Controller Datasheet" - Revision 2.45 October 2009
1885  *
1886  * Principles:
1887  * The source and destination IP addresses of the IP header and the source and
1888  * destination ports of TCP/UDP headers, if any, of received packets are hashed
1889  * against a configurable random key to compute a 32-bit RSS hash result.
1890  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
1891  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
1892  * RSS output index which is used as the RX queue index where to store the
1893  * received packets.
1894  * The following output is supplied in the RX write-back descriptor:
1895  *     - 32-bit result of the Microsoft RSS hash function,
1896  *     - 4-bit RSS type field.
1897  */
1898
1899 /*
1900  * RSS random key supplied in section 7.1.1.7.3 of the Intel 82576 datasheet.
1901  * Used as the default key.
1902  */
1903 static uint8_t rss_intel_key[40] = {
1904         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1905         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1906         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1907         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1908         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1909 };
1910
1911 static void
1912 igb_rss_disable(struct rte_eth_dev *dev)
1913 {
1914         struct e1000_hw *hw;
1915         uint32_t mrqc;
1916
1917         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1918         mrqc = E1000_READ_REG(hw, E1000_MRQC);
1919         mrqc &= ~E1000_MRQC_ENABLE_MASK;
1920         E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1921 }
1922
1923 static void
1924 igb_hw_rss_hash_set(struct e1000_hw *hw, struct rte_eth_rss_conf *rss_conf)
1925 {
1926         uint8_t  *hash_key;
1927         uint32_t rss_key;
1928         uint32_t mrqc;
1929         uint64_t rss_hf;
1930         uint16_t i;
1931
1932         hash_key = rss_conf->rss_key;
1933         if (hash_key != NULL) {
1934                 /* Fill in RSS hash key */
1935                 for (i = 0; i < 10; i++) {
1936                         rss_key  = hash_key[(i * 4)];
1937                         rss_key |= hash_key[(i * 4) + 1] << 8;
1938                         rss_key |= hash_key[(i * 4) + 2] << 16;
1939                         rss_key |= hash_key[(i * 4) + 3] << 24;
1940                         E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key);
1941                 }
1942         }
1943
1944         /* Set configured hashing protocols in MRQC register */
1945         rss_hf = rss_conf->rss_hf;
1946         mrqc = E1000_MRQC_ENABLE_RSS_4Q; /* RSS enabled. */
1947         if (rss_hf & ETH_RSS_IPV4)
1948                 mrqc |= E1000_MRQC_RSS_FIELD_IPV4;
1949         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1950                 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_TCP;
1951         if (rss_hf & ETH_RSS_IPV6)
1952                 mrqc |= E1000_MRQC_RSS_FIELD_IPV6;
1953         if (rss_hf & ETH_RSS_IPV6_EX)
1954                 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_EX;
1955         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1956                 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP;
1957         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
1958                 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
1959         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
1960                 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
1961         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
1962                 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
1963         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
1964                 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP_EX;
1965         E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1966 }
1967
1968 int
1969 eth_igb_rss_hash_update(struct rte_eth_dev *dev,
1970                         struct rte_eth_rss_conf *rss_conf)
1971 {
1972         struct e1000_hw *hw;
1973         uint32_t mrqc;
1974         uint64_t rss_hf;
1975
1976         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1977
1978         /*
1979          * Before changing anything, first check that the update RSS operation
1980          * does not attempt to disable RSS, if RSS was enabled at
1981          * initialization time, or does not attempt to enable RSS, if RSS was
1982          * disabled at initialization time.
1983          */
1984         rss_hf = rss_conf->rss_hf & IGB_RSS_OFFLOAD_ALL;
1985         mrqc = E1000_READ_REG(hw, E1000_MRQC);
1986         if (!(mrqc & E1000_MRQC_ENABLE_MASK)) { /* RSS disabled */
1987                 if (rss_hf != 0) /* Enable RSS */
1988                         return -(EINVAL);
1989                 return 0; /* Nothing to do */
1990         }
1991         /* RSS enabled */
1992         if (rss_hf == 0) /* Disable RSS */
1993                 return -(EINVAL);
1994         igb_hw_rss_hash_set(hw, rss_conf);
1995         return 0;
1996 }
1997
1998 int eth_igb_rss_hash_conf_get(struct rte_eth_dev *dev,
1999                               struct rte_eth_rss_conf *rss_conf)
2000 {
2001         struct e1000_hw *hw;
2002         uint8_t *hash_key;
2003         uint32_t rss_key;
2004         uint32_t mrqc;
2005         uint64_t rss_hf;
2006         uint16_t i;
2007
2008         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2009         hash_key = rss_conf->rss_key;
2010         if (hash_key != NULL) {
2011                 /* Return RSS hash key */
2012                 for (i = 0; i < 10; i++) {
2013                         rss_key = E1000_READ_REG_ARRAY(hw, E1000_RSSRK(0), i);
2014                         hash_key[(i * 4)] = rss_key & 0x000000FF;
2015                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
2016                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
2017                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
2018                 }
2019         }
2020
2021         /* Get RSS functions configured in MRQC register */
2022         mrqc = E1000_READ_REG(hw, E1000_MRQC);
2023         if ((mrqc & E1000_MRQC_ENABLE_RSS_4Q) == 0) { /* RSS is disabled */
2024                 rss_conf->rss_hf = 0;
2025                 return 0;
2026         }
2027         rss_hf = 0;
2028         if (mrqc & E1000_MRQC_RSS_FIELD_IPV4)
2029                 rss_hf |= ETH_RSS_IPV4;
2030         if (mrqc & E1000_MRQC_RSS_FIELD_IPV4_TCP)
2031                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
2032         if (mrqc & E1000_MRQC_RSS_FIELD_IPV6)
2033                 rss_hf |= ETH_RSS_IPV6;
2034         if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_EX)
2035                 rss_hf |= ETH_RSS_IPV6_EX;
2036         if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_TCP)
2037                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
2038         if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_TCP_EX)
2039                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
2040         if (mrqc & E1000_MRQC_RSS_FIELD_IPV4_UDP)
2041                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
2042         if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_UDP)
2043                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
2044         if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_UDP_EX)
2045                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
2046         rss_conf->rss_hf = rss_hf;
2047         return 0;
2048 }
2049
2050 static void
2051 igb_rss_configure(struct rte_eth_dev *dev)
2052 {
2053         struct rte_eth_rss_conf rss_conf;
2054         struct e1000_hw *hw;
2055         uint32_t shift;
2056         uint16_t i;
2057
2058         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2059
2060         /* Fill in redirection table. */
2061         shift = (hw->mac.type == e1000_82575) ? 6 : 0;
2062         for (i = 0; i < 128; i++) {
2063                 union e1000_reta {
2064                         uint32_t dword;
2065                         uint8_t  bytes[4];
2066                 } reta;
2067                 uint8_t q_idx;
2068
2069                 q_idx = (uint8_t) ((dev->data->nb_rx_queues > 1) ?
2070                                    i % dev->data->nb_rx_queues : 0);
2071                 reta.bytes[i & 3] = (uint8_t) (q_idx << shift);
2072                 if ((i & 3) == 3)
2073                         E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
2074         }
2075
2076         /*
2077          * Configure the RSS key and the RSS protocols used to compute
2078          * the RSS hash of input packets.
2079          */
2080         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
2081         if ((rss_conf.rss_hf & IGB_RSS_OFFLOAD_ALL) == 0) {
2082                 igb_rss_disable(dev);
2083                 return;
2084         }
2085         if (rss_conf.rss_key == NULL)
2086                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2087         igb_hw_rss_hash_set(hw, &rss_conf);
2088 }
2089
2090 /*
2091  * Check if the mac type support VMDq or not.
2092  * Return 1 if it supports, otherwise, return 0.
2093  */
2094 static int
2095 igb_is_vmdq_supported(const struct rte_eth_dev *dev)
2096 {
2097         const struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2098
2099         switch (hw->mac.type) {
2100         case e1000_82576:
2101         case e1000_82580:
2102         case e1000_i350:
2103                 return 1;
2104         case e1000_82540:
2105         case e1000_82541:
2106         case e1000_82542:
2107         case e1000_82543:
2108         case e1000_82544:
2109         case e1000_82545:
2110         case e1000_82546:
2111         case e1000_82547:
2112         case e1000_82571:
2113         case e1000_82572:
2114         case e1000_82573:
2115         case e1000_82574:
2116         case e1000_82583:
2117         case e1000_i210:
2118         case e1000_i211:
2119         default:
2120                 PMD_INIT_LOG(ERR, "Cannot support VMDq feature");
2121                 return 0;
2122         }
2123 }
2124
2125 static int
2126 igb_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
2127 {
2128         struct rte_eth_vmdq_rx_conf *cfg;
2129         struct e1000_hw *hw;
2130         uint32_t mrqc, vt_ctl, vmolr, rctl;
2131         int i;
2132
2133         PMD_INIT_FUNC_TRACE();
2134
2135         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2136         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
2137
2138         /* Check if mac type can support VMDq, return value of 0 means NOT support */
2139         if (igb_is_vmdq_supported(dev) == 0)
2140                 return -1;
2141
2142         igb_rss_disable(dev);
2143
2144         /* RCTL: eanble VLAN filter */
2145         rctl = E1000_READ_REG(hw, E1000_RCTL);
2146         rctl |= E1000_RCTL_VFE;
2147         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2148
2149         /* MRQC: enable vmdq */
2150         mrqc = E1000_READ_REG(hw, E1000_MRQC);
2151         mrqc |= E1000_MRQC_ENABLE_VMDQ;
2152         E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2153
2154         /* VTCTL:  pool selection according to VLAN tag */
2155         vt_ctl = E1000_READ_REG(hw, E1000_VT_CTL);
2156         if (cfg->enable_default_pool)
2157                 vt_ctl |= (cfg->default_pool << E1000_VT_CTL_DEFAULT_POOL_SHIFT);
2158         vt_ctl |= E1000_VT_CTL_IGNORE_MAC;
2159         E1000_WRITE_REG(hw, E1000_VT_CTL, vt_ctl);
2160
2161         for (i = 0; i < E1000_VMOLR_SIZE; i++) {
2162                 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
2163                 vmolr &= ~(E1000_VMOLR_AUPE | E1000_VMOLR_ROMPE |
2164                         E1000_VMOLR_ROPE | E1000_VMOLR_BAM |
2165                         E1000_VMOLR_MPME);
2166
2167                 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_UNTAG)
2168                         vmolr |= E1000_VMOLR_AUPE;
2169                 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_HASH_MC)
2170                         vmolr |= E1000_VMOLR_ROMPE;
2171                 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_HASH_UC)
2172                         vmolr |= E1000_VMOLR_ROPE;
2173                 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_BROADCAST)
2174                         vmolr |= E1000_VMOLR_BAM;
2175                 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_MULTICAST)
2176                         vmolr |= E1000_VMOLR_MPME;
2177
2178                 E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
2179         }
2180
2181         /*
2182          * VMOLR: set STRVLAN as 1 if IGMAC in VTCTL is set as 1
2183          * Both 82576 and 82580 support it
2184          */
2185         if (hw->mac.type != e1000_i350) {
2186                 for (i = 0; i < E1000_VMOLR_SIZE; i++) {
2187                         vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
2188                         vmolr |= E1000_VMOLR_STRVLAN;
2189                         E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
2190                 }
2191         }
2192
2193         /* VFTA - enable all vlan filters */
2194         for (i = 0; i < IGB_VFTA_SIZE; i++)
2195                 E1000_WRITE_REG(hw, (E1000_VFTA+(i*4)), UINT32_MAX);
2196
2197         /* VFRE: 8 pools enabling for rx, both 82576 and i350 support it */
2198         if (hw->mac.type != e1000_82580)
2199                 E1000_WRITE_REG(hw, E1000_VFRE, E1000_MBVFICR_VFREQ_MASK);
2200
2201         /*
2202          * RAH/RAL - allow pools to read specific mac addresses
2203          * In this case, all pools should be able to read from mac addr 0
2204          */
2205         E1000_WRITE_REG(hw, E1000_RAH(0), (E1000_RAH_AV | UINT16_MAX));
2206         E1000_WRITE_REG(hw, E1000_RAL(0), UINT32_MAX);
2207
2208         /* VLVF: set up filters for vlan tags as configured */
2209         for (i = 0; i < cfg->nb_pool_maps; i++) {
2210                 /* set vlan id in VF register and set the valid bit */
2211                 E1000_WRITE_REG(hw, E1000_VLVF(i), (E1000_VLVF_VLANID_ENABLE | \
2212                         (cfg->pool_map[i].vlan_id & ETH_VLAN_ID_MAX) | \
2213                         ((cfg->pool_map[i].pools << E1000_VLVF_POOLSEL_SHIFT ) & \
2214                         E1000_VLVF_POOLSEL_MASK)));
2215         }
2216
2217         E1000_WRITE_FLUSH(hw);
2218
2219         return 0;
2220 }
2221
2222
2223 /*********************************************************************
2224  *
2225  *  Enable receive unit.
2226  *
2227  **********************************************************************/
2228
2229 static int
2230 igb_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
2231 {
2232         struct igb_rx_entry *rxe = rxq->sw_ring;
2233         uint64_t dma_addr;
2234         unsigned i;
2235
2236         /* Initialize software ring entries. */
2237         for (i = 0; i < rxq->nb_rx_desc; i++) {
2238                 volatile union e1000_adv_rx_desc *rxd;
2239                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
2240
2241                 if (mbuf == NULL) {
2242                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed "
2243                                      "queue_id=%hu", rxq->queue_id);
2244                         return -ENOMEM;
2245                 }
2246                 dma_addr =
2247                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
2248                 rxd = &rxq->rx_ring[i];
2249                 rxd->read.hdr_addr = 0;
2250                 rxd->read.pkt_addr = dma_addr;
2251                 rxe[i].mbuf = mbuf;
2252         }
2253
2254         return 0;
2255 }
2256
2257 #define E1000_MRQC_DEF_Q_SHIFT               (3)
2258 static int
2259 igb_dev_mq_rx_configure(struct rte_eth_dev *dev)
2260 {
2261         struct e1000_hw *hw =
2262                 E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2263         uint32_t mrqc;
2264
2265         if (RTE_ETH_DEV_SRIOV(dev).active == ETH_8_POOLS) {
2266                 /*
2267                  * SRIOV active scheme
2268                  * FIXME if support RSS together with VMDq & SRIOV
2269                  */
2270                 mrqc = E1000_MRQC_ENABLE_VMDQ;
2271                 /* 011b Def_Q ignore, according to VT_CTL.DEF_PL */
2272                 mrqc |= 0x3 << E1000_MRQC_DEF_Q_SHIFT;
2273                 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2274         } else if(RTE_ETH_DEV_SRIOV(dev).active == 0) {
2275                 /*
2276                  * SRIOV inactive scheme
2277                  */
2278                 switch (dev->data->dev_conf.rxmode.mq_mode) {
2279                         case ETH_MQ_RX_RSS:
2280                                 igb_rss_configure(dev);
2281                                 break;
2282                         case ETH_MQ_RX_VMDQ_ONLY:
2283                                 /*Configure general VMDQ only RX parameters*/
2284                                 igb_vmdq_rx_hw_configure(dev);
2285                                 break;
2286                         case ETH_MQ_RX_NONE:
2287                                 /* if mq_mode is none, disable rss mode.*/
2288                         default:
2289                                 igb_rss_disable(dev);
2290                                 break;
2291                 }
2292         }
2293
2294         return 0;
2295 }
2296
2297 int
2298 eth_igb_rx_init(struct rte_eth_dev *dev)
2299 {
2300         struct rte_eth_rxmode *rxmode;
2301         struct e1000_hw     *hw;
2302         struct igb_rx_queue *rxq;
2303         uint32_t rctl;
2304         uint32_t rxcsum;
2305         uint32_t srrctl;
2306         uint16_t buf_size;
2307         uint16_t rctl_bsize;
2308         uint16_t i;
2309         int ret;
2310
2311         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2312         srrctl = 0;
2313
2314         /*
2315          * Make sure receives are disabled while setting
2316          * up the descriptor ring.
2317          */
2318         rctl = E1000_READ_REG(hw, E1000_RCTL);
2319         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2320
2321         rxmode = &dev->data->dev_conf.rxmode;
2322
2323         /*
2324          * Configure support of jumbo frames, if any.
2325          */
2326         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
2327                 rctl |= E1000_RCTL_LPE;
2328
2329                 /*
2330                  * Set maximum packet length by default, and might be updated
2331                  * together with enabling/disabling dual VLAN.
2332                  */
2333                 E1000_WRITE_REG(hw, E1000_RLPML,
2334                         dev->data->dev_conf.rxmode.max_rx_pkt_len +
2335                                                 VLAN_TAG_SIZE);
2336         } else
2337                 rctl &= ~E1000_RCTL_LPE;
2338
2339         /* Configure and enable each RX queue. */
2340         rctl_bsize = 0;
2341         dev->rx_pkt_burst = eth_igb_recv_pkts;
2342         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2343                 uint64_t bus_addr;
2344                 uint32_t rxdctl;
2345
2346                 rxq = dev->data->rx_queues[i];
2347
2348                 rxq->flags = 0;
2349                 /*
2350                  * i350 and i354 vlan packets have vlan tags byte swapped.
2351                  */
2352                 if (hw->mac.type == e1000_i350 || hw->mac.type == e1000_i354) {
2353                         rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
2354                         PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
2355                 } else {
2356                         PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
2357                 }
2358
2359                 /* Allocate buffers for descriptor rings and set up queue */
2360                 ret = igb_alloc_rx_queue_mbufs(rxq);
2361                 if (ret)
2362                         return ret;
2363
2364                 /*
2365                  * Reset crc_len in case it was changed after queue setup by a
2366                  *  call to configure
2367                  */
2368                 rxq->crc_len = (uint8_t)(dev->data->dev_conf.rxmode.offloads &
2369                                 DEV_RX_OFFLOAD_CRC_STRIP ? 0 : ETHER_CRC_LEN);
2370
2371                 bus_addr = rxq->rx_ring_phys_addr;
2372                 E1000_WRITE_REG(hw, E1000_RDLEN(rxq->reg_idx),
2373                                 rxq->nb_rx_desc *
2374                                 sizeof(union e1000_adv_rx_desc));
2375                 E1000_WRITE_REG(hw, E1000_RDBAH(rxq->reg_idx),
2376                                 (uint32_t)(bus_addr >> 32));
2377                 E1000_WRITE_REG(hw, E1000_RDBAL(rxq->reg_idx), (uint32_t)bus_addr);
2378
2379                 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2380
2381                 /*
2382                  * Configure RX buffer size.
2383                  */
2384                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
2385                         RTE_PKTMBUF_HEADROOM);
2386                 if (buf_size >= 1024) {
2387                         /*
2388                          * Configure the BSIZEPACKET field of the SRRCTL
2389                          * register of the queue.
2390                          * Value is in 1 KB resolution, from 1 KB to 127 KB.
2391                          * If this field is equal to 0b, then RCTL.BSIZE
2392                          * determines the RX packet buffer size.
2393                          */
2394                         srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
2395                                    E1000_SRRCTL_BSIZEPKT_MASK);
2396                         buf_size = (uint16_t) ((srrctl &
2397                                                 E1000_SRRCTL_BSIZEPKT_MASK) <<
2398                                                E1000_SRRCTL_BSIZEPKT_SHIFT);
2399
2400                         /* It adds dual VLAN length for supporting dual VLAN */
2401                         if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
2402                                                 2 * VLAN_TAG_SIZE) > buf_size){
2403                                 if (!dev->data->scattered_rx)
2404                                         PMD_INIT_LOG(DEBUG,
2405                                                      "forcing scatter mode");
2406                                 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2407                                 dev->data->scattered_rx = 1;
2408                         }
2409                 } else {
2410                         /*
2411                          * Use BSIZE field of the device RCTL register.
2412                          */
2413                         if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
2414                                 rctl_bsize = buf_size;
2415                         if (!dev->data->scattered_rx)
2416                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2417                         dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2418                         dev->data->scattered_rx = 1;
2419                 }
2420
2421                 /* Set if packets are dropped when no descriptors available */
2422                 if (rxq->drop_en)
2423                         srrctl |= E1000_SRRCTL_DROP_EN;
2424
2425                 E1000_WRITE_REG(hw, E1000_SRRCTL(rxq->reg_idx), srrctl);
2426
2427                 /* Enable this RX queue. */
2428                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(rxq->reg_idx));
2429                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2430                 rxdctl &= 0xFFF00000;
2431                 rxdctl |= (rxq->pthresh & 0x1F);
2432                 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
2433                 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
2434                 E1000_WRITE_REG(hw, E1000_RXDCTL(rxq->reg_idx), rxdctl);
2435         }
2436
2437         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_SCATTER) {
2438                 if (!dev->data->scattered_rx)
2439                         PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2440                 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2441                 dev->data->scattered_rx = 1;
2442         }
2443
2444         /*
2445          * Setup BSIZE field of RCTL register, if needed.
2446          * Buffer sizes >= 1024 are not [supposed to be] setup in the RCTL
2447          * register, since the code above configures the SRRCTL register of
2448          * the RX queue in such a case.
2449          * All configurable sizes are:
2450          * 16384: rctl |= (E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX);
2451          *  8192: rctl |= (E1000_RCTL_SZ_8192  | E1000_RCTL_BSEX);
2452          *  4096: rctl |= (E1000_RCTL_SZ_4096  | E1000_RCTL_BSEX);
2453          *  2048: rctl |= E1000_RCTL_SZ_2048;
2454          *  1024: rctl |= E1000_RCTL_SZ_1024;
2455          *   512: rctl |= E1000_RCTL_SZ_512;
2456          *   256: rctl |= E1000_RCTL_SZ_256;
2457          */
2458         if (rctl_bsize > 0) {
2459                 if (rctl_bsize >= 512) /* 512 <= buf_size < 1024 - use 512 */
2460                         rctl |= E1000_RCTL_SZ_512;
2461                 else /* 256 <= buf_size < 512 - use 256 */
2462                         rctl |= E1000_RCTL_SZ_256;
2463         }
2464
2465         /*
2466          * Configure RSS if device configured with multiple RX queues.
2467          */
2468         igb_dev_mq_rx_configure(dev);
2469
2470         /* Update the rctl since igb_dev_mq_rx_configure may change its value */
2471         rctl |= E1000_READ_REG(hw, E1000_RCTL);
2472
2473         /*
2474          * Setup the Checksum Register.
2475          * Receive Full-Packet Checksum Offload is mutually exclusive with RSS.
2476          */
2477         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
2478         rxcsum |= E1000_RXCSUM_PCSD;
2479
2480         /* Enable both L3/L4 rx checksum offload */
2481         if (rxmode->offloads & DEV_RX_OFFLOAD_IPV4_CKSUM)
2482                 rxcsum |= E1000_RXCSUM_IPOFL;
2483         else
2484                 rxcsum &= ~E1000_RXCSUM_IPOFL;
2485         if (rxmode->offloads &
2486                 (DEV_RX_OFFLOAD_TCP_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM))
2487                 rxcsum |= E1000_RXCSUM_TUOFL;
2488         else
2489                 rxcsum &= ~E1000_RXCSUM_TUOFL;
2490         if (rxmode->offloads & DEV_RX_OFFLOAD_CHECKSUM)
2491                 rxcsum |= E1000_RXCSUM_CRCOFL;
2492         else
2493                 rxcsum &= ~E1000_RXCSUM_CRCOFL;
2494
2495         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2496
2497         /* Setup the Receive Control Register. */
2498         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_CRC_STRIP) {
2499                 rctl |= E1000_RCTL_SECRC; /* Strip Ethernet CRC. */
2500
2501                 /* set STRCRC bit in all queues */
2502                 if (hw->mac.type == e1000_i350 ||
2503                     hw->mac.type == e1000_i210 ||
2504                     hw->mac.type == e1000_i211 ||
2505                     hw->mac.type == e1000_i354) {
2506                         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2507                                 rxq = dev->data->rx_queues[i];
2508                                 uint32_t dvmolr = E1000_READ_REG(hw,
2509                                         E1000_DVMOLR(rxq->reg_idx));
2510                                 dvmolr |= E1000_DVMOLR_STRCRC;
2511                                 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
2512                         }
2513                 }
2514         } else {
2515                 rctl &= ~E1000_RCTL_SECRC; /* Do not Strip Ethernet CRC. */
2516
2517                 /* clear STRCRC bit in all queues */
2518                 if (hw->mac.type == e1000_i350 ||
2519                     hw->mac.type == e1000_i210 ||
2520                     hw->mac.type == e1000_i211 ||
2521                     hw->mac.type == e1000_i354) {
2522                         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2523                                 rxq = dev->data->rx_queues[i];
2524                                 uint32_t dvmolr = E1000_READ_REG(hw,
2525                                         E1000_DVMOLR(rxq->reg_idx));
2526                                 dvmolr &= ~E1000_DVMOLR_STRCRC;
2527                                 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
2528                         }
2529                 }
2530         }
2531
2532         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2533         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
2534                 E1000_RCTL_RDMTS_HALF |
2535                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2536
2537         /* Make sure VLAN Filters are off. */
2538         if (dev->data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_VMDQ_ONLY)
2539                 rctl &= ~E1000_RCTL_VFE;
2540         /* Don't store bad packets. */
2541         rctl &= ~E1000_RCTL_SBP;
2542
2543         /* Enable Receives. */
2544         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2545
2546         /*
2547          * Setup the HW Rx Head and Tail Descriptor Pointers.
2548          * This needs to be done after enable.
2549          */
2550         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2551                 rxq = dev->data->rx_queues[i];
2552                 E1000_WRITE_REG(hw, E1000_RDH(rxq->reg_idx), 0);
2553                 E1000_WRITE_REG(hw, E1000_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
2554         }
2555
2556         return 0;
2557 }
2558
2559 /*********************************************************************
2560  *
2561  *  Enable transmit unit.
2562  *
2563  **********************************************************************/
2564 void
2565 eth_igb_tx_init(struct rte_eth_dev *dev)
2566 {
2567         struct e1000_hw     *hw;
2568         struct igb_tx_queue *txq;
2569         uint32_t tctl;
2570         uint32_t txdctl;
2571         uint16_t i;
2572
2573         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2574
2575         /* Setup the Base and Length of the Tx Descriptor Rings. */
2576         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2577                 uint64_t bus_addr;
2578                 txq = dev->data->tx_queues[i];
2579                 bus_addr = txq->tx_ring_phys_addr;
2580
2581                 E1000_WRITE_REG(hw, E1000_TDLEN(txq->reg_idx),
2582                                 txq->nb_tx_desc *
2583                                 sizeof(union e1000_adv_tx_desc));
2584                 E1000_WRITE_REG(hw, E1000_TDBAH(txq->reg_idx),
2585                                 (uint32_t)(bus_addr >> 32));
2586                 E1000_WRITE_REG(hw, E1000_TDBAL(txq->reg_idx), (uint32_t)bus_addr);
2587
2588                 /* Setup the HW Tx Head and Tail descriptor pointers. */
2589                 E1000_WRITE_REG(hw, E1000_TDT(txq->reg_idx), 0);
2590                 E1000_WRITE_REG(hw, E1000_TDH(txq->reg_idx), 0);
2591
2592                 /* Setup Transmit threshold registers. */
2593                 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(txq->reg_idx));
2594                 txdctl |= txq->pthresh & 0x1F;
2595                 txdctl |= ((txq->hthresh & 0x1F) << 8);
2596                 txdctl |= ((txq->wthresh & 0x1F) << 16);
2597                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2598                 E1000_WRITE_REG(hw, E1000_TXDCTL(txq->reg_idx), txdctl);
2599         }
2600
2601         /* Program the Transmit Control Register. */
2602         tctl = E1000_READ_REG(hw, E1000_TCTL);
2603         tctl &= ~E1000_TCTL_CT;
2604         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
2605                  (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
2606
2607         e1000_config_collision_dist(hw);
2608
2609         /* This write will effectively turn on the transmit unit. */
2610         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
2611 }
2612
2613 /*********************************************************************
2614  *
2615  *  Enable VF receive unit.
2616  *
2617  **********************************************************************/
2618 int
2619 eth_igbvf_rx_init(struct rte_eth_dev *dev)
2620 {
2621         struct e1000_hw     *hw;
2622         struct igb_rx_queue *rxq;
2623         uint32_t srrctl;
2624         uint16_t buf_size;
2625         uint16_t rctl_bsize;
2626         uint16_t i;
2627         int ret;
2628
2629         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2630
2631         /* setup MTU */
2632         e1000_rlpml_set_vf(hw,
2633                 (uint16_t)(dev->data->dev_conf.rxmode.max_rx_pkt_len +
2634                 VLAN_TAG_SIZE));
2635
2636         /* Configure and enable each RX queue. */
2637         rctl_bsize = 0;
2638         dev->rx_pkt_burst = eth_igb_recv_pkts;
2639         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2640                 uint64_t bus_addr;
2641                 uint32_t rxdctl;
2642
2643                 rxq = dev->data->rx_queues[i];
2644
2645                 rxq->flags = 0;
2646                 /*
2647                  * i350VF LB vlan packets have vlan tags byte swapped.
2648                  */
2649                 if (hw->mac.type == e1000_vfadapt_i350) {
2650                         rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
2651                         PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
2652                 } else {
2653                         PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
2654                 }
2655
2656                 /* Allocate buffers for descriptor rings and set up queue */
2657                 ret = igb_alloc_rx_queue_mbufs(rxq);
2658                 if (ret)
2659                         return ret;
2660
2661                 bus_addr = rxq->rx_ring_phys_addr;
2662                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
2663                                 rxq->nb_rx_desc *
2664                                 sizeof(union e1000_adv_rx_desc));
2665                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
2666                                 (uint32_t)(bus_addr >> 32));
2667                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr);
2668
2669                 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2670
2671                 /*
2672                  * Configure RX buffer size.
2673                  */
2674                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
2675                         RTE_PKTMBUF_HEADROOM);
2676                 if (buf_size >= 1024) {
2677                         /*
2678                          * Configure the BSIZEPACKET field of the SRRCTL
2679                          * register of the queue.
2680                          * Value is in 1 KB resolution, from 1 KB to 127 KB.
2681                          * If this field is equal to 0b, then RCTL.BSIZE
2682                          * determines the RX packet buffer size.
2683                          */
2684                         srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
2685                                    E1000_SRRCTL_BSIZEPKT_MASK);
2686                         buf_size = (uint16_t) ((srrctl &
2687                                                 E1000_SRRCTL_BSIZEPKT_MASK) <<
2688                                                E1000_SRRCTL_BSIZEPKT_SHIFT);
2689
2690                         /* It adds dual VLAN length for supporting dual VLAN */
2691                         if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
2692                                                 2 * VLAN_TAG_SIZE) > buf_size){
2693                                 if (!dev->data->scattered_rx)
2694                                         PMD_INIT_LOG(DEBUG,
2695                                                      "forcing scatter mode");
2696                                 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2697                                 dev->data->scattered_rx = 1;
2698                         }
2699                 } else {
2700                         /*
2701                          * Use BSIZE field of the device RCTL register.
2702                          */
2703                         if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
2704                                 rctl_bsize = buf_size;
2705                         if (!dev->data->scattered_rx)
2706                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2707                         dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2708                         dev->data->scattered_rx = 1;
2709                 }
2710
2711                 /* Set if packets are dropped when no descriptors available */
2712                 if (rxq->drop_en)
2713                         srrctl |= E1000_SRRCTL_DROP_EN;
2714
2715                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
2716
2717                 /* Enable this RX queue. */
2718                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
2719                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2720                 rxdctl &= 0xFFF00000;
2721                 rxdctl |= (rxq->pthresh & 0x1F);
2722                 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
2723                 if (hw->mac.type == e1000_vfadapt) {
2724                         /*
2725                          * Workaround of 82576 VF Erratum
2726                          * force set WTHRESH to 1
2727                          * to avoid Write-Back not triggered sometimes
2728                          */
2729                         rxdctl |= 0x10000;
2730                         PMD_INIT_LOG(DEBUG, "Force set RX WTHRESH to 1 !");
2731                 }
2732                 else
2733                         rxdctl |= ((rxq->wthresh & 0x1F) << 16);
2734                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
2735         }
2736
2737         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_SCATTER) {
2738                 if (!dev->data->scattered_rx)
2739                         PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2740                 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2741                 dev->data->scattered_rx = 1;
2742         }
2743
2744         /*
2745          * Setup the HW Rx Head and Tail Descriptor Pointers.
2746          * This needs to be done after enable.
2747          */
2748         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2749                 rxq = dev->data->rx_queues[i];
2750                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
2751                 E1000_WRITE_REG(hw, E1000_RDT(i), rxq->nb_rx_desc - 1);
2752         }
2753
2754         return 0;
2755 }
2756
2757 /*********************************************************************
2758  *
2759  *  Enable VF transmit unit.
2760  *
2761  **********************************************************************/
2762 void
2763 eth_igbvf_tx_init(struct rte_eth_dev *dev)
2764 {
2765         struct e1000_hw     *hw;
2766         struct igb_tx_queue *txq;
2767         uint32_t txdctl;
2768         uint16_t i;
2769
2770         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2771
2772         /* Setup the Base and Length of the Tx Descriptor Rings. */
2773         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2774                 uint64_t bus_addr;
2775
2776                 txq = dev->data->tx_queues[i];
2777                 bus_addr = txq->tx_ring_phys_addr;
2778                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
2779                                 txq->nb_tx_desc *
2780                                 sizeof(union e1000_adv_tx_desc));
2781                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
2782                                 (uint32_t)(bus_addr >> 32));
2783                 E1000_WRITE_REG(hw, E1000_TDBAL(i), (uint32_t)bus_addr);
2784
2785                 /* Setup the HW Tx Head and Tail descriptor pointers. */
2786                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
2787                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
2788
2789                 /* Setup Transmit threshold registers. */
2790                 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
2791                 txdctl |= txq->pthresh & 0x1F;
2792                 txdctl |= ((txq->hthresh & 0x1F) << 8);
2793                 if (hw->mac.type == e1000_82576) {
2794                         /*
2795                          * Workaround of 82576 VF Erratum
2796                          * force set WTHRESH to 1
2797                          * to avoid Write-Back not triggered sometimes
2798                          */
2799                         txdctl |= 0x10000;
2800                         PMD_INIT_LOG(DEBUG, "Force set TX WTHRESH to 1 !");
2801                 }
2802                 else
2803                         txdctl |= ((txq->wthresh & 0x1F) << 16);
2804                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2805                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
2806         }
2807
2808 }
2809
2810 void
2811 igb_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2812         struct rte_eth_rxq_info *qinfo)
2813 {
2814         struct igb_rx_queue *rxq;
2815
2816         rxq = dev->data->rx_queues[queue_id];
2817
2818         qinfo->mp = rxq->mb_pool;
2819         qinfo->scattered_rx = dev->data->scattered_rx;
2820         qinfo->nb_desc = rxq->nb_rx_desc;
2821
2822         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
2823         qinfo->conf.rx_drop_en = rxq->drop_en;
2824         qinfo->conf.offloads = rxq->offloads;
2825 }
2826
2827 void
2828 igb_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2829         struct rte_eth_txq_info *qinfo)
2830 {
2831         struct igb_tx_queue *txq;
2832
2833         txq = dev->data->tx_queues[queue_id];
2834
2835         qinfo->nb_desc = txq->nb_tx_desc;
2836
2837         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
2838         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
2839         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
2840 }
2841
2842 int
2843 igb_config_rss_filter(struct rte_eth_dev *dev,
2844                 struct igb_rte_flow_rss_conf *conf, bool add)
2845 {
2846         uint32_t shift;
2847         uint16_t i, j;
2848         struct rte_eth_rss_conf rss_conf = conf->rss_conf;
2849         struct e1000_filter_info *filter_info =
2850                 E1000_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
2851         struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2852
2853         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2854
2855         if (!add) {
2856                 if (memcmp(conf, &filter_info->rss_info,
2857                         sizeof(struct igb_rte_flow_rss_conf)) == 0) {
2858                         igb_rss_disable(dev);
2859                         memset(&filter_info->rss_info, 0,
2860                                 sizeof(struct igb_rte_flow_rss_conf));
2861                         return 0;
2862                 }
2863                 return -EINVAL;
2864         }
2865
2866         if (filter_info->rss_info.num)
2867                 return -EINVAL;
2868
2869         /* Fill in redirection table. */
2870         shift = (hw->mac.type == e1000_82575) ? 6 : 0;
2871         for (i = 0, j = 0; i < 128; i++, j++) {
2872                 union e1000_reta {
2873                         uint32_t dword;
2874                         uint8_t  bytes[4];
2875                 } reta;
2876                 uint8_t q_idx;
2877
2878                 if (j == conf->num)
2879                         j = 0;
2880                 q_idx = conf->queue[j];
2881                 reta.bytes[i & 3] = (uint8_t)(q_idx << shift);
2882                 if ((i & 3) == 3)
2883                         E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
2884         }
2885
2886         /* Configure the RSS key and the RSS protocols used to compute
2887          * the RSS hash of input packets.
2888          */
2889         if ((rss_conf.rss_hf & IGB_RSS_OFFLOAD_ALL) == 0) {
2890                 igb_rss_disable(dev);
2891                 return 0;
2892         }
2893         if (rss_conf.rss_key == NULL)
2894                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2895         igb_hw_rss_hash_set(hw, &rss_conf);
2896
2897         rte_memcpy(&filter_info->rss_info,
2898                 conf, sizeof(struct igb_rte_flow_rss_conf));
2899
2900         return 0;
2901 }