net: add rte prefix to SCTP structure
[dpdk.git] / drivers / net / e1000 / igb_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4
5 #include <sys/queue.h>
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <inttypes.h>
14
15 #include <rte_interrupts.h>
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_log.h>
19 #include <rte_debug.h>
20 #include <rte_pci.h>
21 #include <rte_memory.h>
22 #include <rte_memcpy.h>
23 #include <rte_memzone.h>
24 #include <rte_launch.h>
25 #include <rte_eal.h>
26 #include <rte_per_lcore.h>
27 #include <rte_lcore.h>
28 #include <rte_atomic.h>
29 #include <rte_branch_prediction.h>
30 #include <rte_mempool.h>
31 #include <rte_malloc.h>
32 #include <rte_mbuf.h>
33 #include <rte_ether.h>
34 #include <rte_ethdev_driver.h>
35 #include <rte_prefetch.h>
36 #include <rte_udp.h>
37 #include <rte_tcp.h>
38 #include <rte_sctp.h>
39 #include <rte_net.h>
40 #include <rte_string_fns.h>
41
42 #include "e1000_logs.h"
43 #include "base/e1000_api.h"
44 #include "e1000_ethdev.h"
45
46 #ifdef RTE_LIBRTE_IEEE1588
47 #define IGB_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
48 #else
49 #define IGB_TX_IEEE1588_TMST 0
50 #endif
51 /* Bit Mask to indicate what bits required for building TX context */
52 #define IGB_TX_OFFLOAD_MASK (                    \
53                 PKT_TX_OUTER_IPV6 |      \
54                 PKT_TX_OUTER_IPV4 |      \
55                 PKT_TX_IPV6 |            \
56                 PKT_TX_IPV4 |            \
57                 PKT_TX_VLAN_PKT |                \
58                 PKT_TX_IP_CKSUM |                \
59                 PKT_TX_L4_MASK |                 \
60                 PKT_TX_TCP_SEG |                 \
61                 IGB_TX_IEEE1588_TMST)
62
63 #define IGB_TX_OFFLOAD_NOTSUP_MASK \
64                 (PKT_TX_OFFLOAD_MASK ^ IGB_TX_OFFLOAD_MASK)
65
66 /**
67  * Structure associated with each descriptor of the RX ring of a RX queue.
68  */
69 struct igb_rx_entry {
70         struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
71 };
72
73 /**
74  * Structure associated with each descriptor of the TX ring of a TX queue.
75  */
76 struct igb_tx_entry {
77         struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
78         uint16_t next_id; /**< Index of next descriptor in ring. */
79         uint16_t last_id; /**< Index of last scattered descriptor. */
80 };
81
82 /**
83  * rx queue flags
84  */
85 enum igb_rxq_flags {
86         IGB_RXQ_FLAG_LB_BSWAP_VLAN = 0x01,
87 };
88
89 /**
90  * Structure associated with each RX queue.
91  */
92 struct igb_rx_queue {
93         struct rte_mempool  *mb_pool;   /**< mbuf pool to populate RX ring. */
94         volatile union e1000_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
95         uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
96         volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
97         volatile uint32_t   *rdh_reg_addr; /**< RDH register address. */
98         struct igb_rx_entry *sw_ring;   /**< address of RX software ring. */
99         struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
100         struct rte_mbuf *pkt_last_seg;  /**< Last segment of current packet. */
101         uint16_t            nb_rx_desc; /**< number of RX descriptors. */
102         uint16_t            rx_tail;    /**< current value of RDT register. */
103         uint16_t            nb_rx_hold; /**< number of held free RX desc. */
104         uint16_t            rx_free_thresh; /**< max free RX desc to hold. */
105         uint16_t            queue_id;   /**< RX queue index. */
106         uint16_t            reg_idx;    /**< RX queue register index. */
107         uint16_t            port_id;    /**< Device port identifier. */
108         uint8_t             pthresh;    /**< Prefetch threshold register. */
109         uint8_t             hthresh;    /**< Host threshold register. */
110         uint8_t             wthresh;    /**< Write-back threshold register. */
111         uint8_t             crc_len;    /**< 0 if CRC stripped, 4 otherwise. */
112         uint8_t             drop_en;  /**< If not 0, set SRRCTL.Drop_En. */
113         uint32_t            flags;      /**< RX flags. */
114         uint64_t            offloads;   /**< offloads of DEV_RX_OFFLOAD_* */
115 };
116
117 /**
118  * Hardware context number
119  */
120 enum igb_advctx_num {
121         IGB_CTX_0    = 0, /**< CTX0    */
122         IGB_CTX_1    = 1, /**< CTX1    */
123         IGB_CTX_NUM  = 2, /**< CTX_NUM */
124 };
125
126 /** Offload features */
127 union igb_tx_offload {
128         uint64_t data;
129         struct {
130                 uint64_t l3_len:9; /**< L3 (IP) Header Length. */
131                 uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
132                 uint64_t vlan_tci:16;  /**< VLAN Tag Control Identifier(CPU order). */
133                 uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
134                 uint64_t tso_segsz:16; /**< TCP TSO segment size. */
135
136                 /* uint64_t unused:8; */
137         };
138 };
139
140 /*
141  * Compare mask for igb_tx_offload.data,
142  * should be in sync with igb_tx_offload layout.
143  * */
144 #define TX_MACIP_LEN_CMP_MASK   0x000000000000FFFFULL /**< L2L3 header mask. */
145 #define TX_VLAN_CMP_MASK                0x00000000FFFF0000ULL /**< Vlan mask. */
146 #define TX_TCP_LEN_CMP_MASK             0x000000FF00000000ULL /**< TCP header mask. */
147 #define TX_TSO_MSS_CMP_MASK             0x00FFFF0000000000ULL /**< TSO segsz mask. */
148 /** Mac + IP + TCP + Mss mask. */
149 #define TX_TSO_CMP_MASK \
150         (TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK)
151
152 /**
153  * Strucutre to check if new context need be built
154  */
155 struct igb_advctx_info {
156         uint64_t flags;           /**< ol_flags related to context build. */
157         /** tx offload: vlan, tso, l2-l3-l4 lengths. */
158         union igb_tx_offload tx_offload;
159         /** compare mask for tx offload. */
160         union igb_tx_offload tx_offload_mask;
161 };
162
163 /**
164  * Structure associated with each TX queue.
165  */
166 struct igb_tx_queue {
167         volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
168         uint64_t               tx_ring_phys_addr; /**< TX ring DMA address. */
169         struct igb_tx_entry    *sw_ring; /**< virtual address of SW ring. */
170         volatile uint32_t      *tdt_reg_addr; /**< Address of TDT register. */
171         uint32_t               txd_type;      /**< Device-specific TXD type */
172         uint16_t               nb_tx_desc;    /**< number of TX descriptors. */
173         uint16_t               tx_tail; /**< Current value of TDT register. */
174         uint16_t               tx_head;
175         /**< Index of first used TX descriptor. */
176         uint16_t               queue_id; /**< TX queue index. */
177         uint16_t               reg_idx;  /**< TX queue register index. */
178         uint16_t               port_id;  /**< Device port identifier. */
179         uint8_t                pthresh;  /**< Prefetch threshold register. */
180         uint8_t                hthresh;  /**< Host threshold register. */
181         uint8_t                wthresh;  /**< Write-back threshold register. */
182         uint32_t               ctx_curr;
183         /**< Current used hardware descriptor. */
184         uint32_t               ctx_start;
185         /**< Start context position for transmit queue. */
186         struct igb_advctx_info ctx_cache[IGB_CTX_NUM];
187         /**< Hardware context history.*/
188         uint64_t               offloads; /**< offloads of DEV_TX_OFFLOAD_* */
189 };
190
191 #if 1
192 #define RTE_PMD_USE_PREFETCH
193 #endif
194
195 #ifdef RTE_PMD_USE_PREFETCH
196 #define rte_igb_prefetch(p)     rte_prefetch0(p)
197 #else
198 #define rte_igb_prefetch(p)     do {} while(0)
199 #endif
200
201 #ifdef RTE_PMD_PACKET_PREFETCH
202 #define rte_packet_prefetch(p) rte_prefetch1(p)
203 #else
204 #define rte_packet_prefetch(p)  do {} while(0)
205 #endif
206
207 /*
208  * Macro for VMDq feature for 1 GbE NIC.
209  */
210 #define E1000_VMOLR_SIZE                        (8)
211 #define IGB_TSO_MAX_HDRLEN                      (512)
212 #define IGB_TSO_MAX_MSS                         (9216)
213
214 /*********************************************************************
215  *
216  *  TX function
217  *
218  **********************************************************************/
219
220 /*
221  *There're some limitations in hardware for TCP segmentation offload. We
222  *should check whether the parameters are valid.
223  */
224 static inline uint64_t
225 check_tso_para(uint64_t ol_req, union igb_tx_offload ol_para)
226 {
227         if (!(ol_req & PKT_TX_TCP_SEG))
228                 return ol_req;
229         if ((ol_para.tso_segsz > IGB_TSO_MAX_MSS) || (ol_para.l2_len +
230                         ol_para.l3_len + ol_para.l4_len > IGB_TSO_MAX_HDRLEN)) {
231                 ol_req &= ~PKT_TX_TCP_SEG;
232                 ol_req |= PKT_TX_TCP_CKSUM;
233         }
234         return ol_req;
235 }
236
237 /*
238  * Advanced context descriptor are almost same between igb/ixgbe
239  * This is a separate function, looking for optimization opportunity here
240  * Rework required to go with the pre-defined values.
241  */
242
243 static inline void
244 igbe_set_xmit_ctx(struct igb_tx_queue* txq,
245                 volatile struct e1000_adv_tx_context_desc *ctx_txd,
246                 uint64_t ol_flags, union igb_tx_offload tx_offload)
247 {
248         uint32_t type_tucmd_mlhl;
249         uint32_t mss_l4len_idx;
250         uint32_t ctx_idx, ctx_curr;
251         uint32_t vlan_macip_lens;
252         union igb_tx_offload tx_offload_mask;
253
254         ctx_curr = txq->ctx_curr;
255         ctx_idx = ctx_curr + txq->ctx_start;
256
257         tx_offload_mask.data = 0;
258         type_tucmd_mlhl = 0;
259
260         /* Specify which HW CTX to upload. */
261         mss_l4len_idx = (ctx_idx << E1000_ADVTXD_IDX_SHIFT);
262
263         if (ol_flags & PKT_TX_VLAN_PKT)
264                 tx_offload_mask.data |= TX_VLAN_CMP_MASK;
265
266         /* check if TCP segmentation required for this packet */
267         if (ol_flags & PKT_TX_TCP_SEG) {
268                 /* implies IP cksum in IPv4 */
269                 if (ol_flags & PKT_TX_IP_CKSUM)
270                         type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4 |
271                                 E1000_ADVTXD_TUCMD_L4T_TCP |
272                                 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
273                 else
274                         type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV6 |
275                                 E1000_ADVTXD_TUCMD_L4T_TCP |
276                                 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
277
278                 tx_offload_mask.data |= TX_TSO_CMP_MASK;
279                 mss_l4len_idx |= tx_offload.tso_segsz << E1000_ADVTXD_MSS_SHIFT;
280                 mss_l4len_idx |= tx_offload.l4_len << E1000_ADVTXD_L4LEN_SHIFT;
281         } else { /* no TSO, check if hardware checksum is needed */
282                 if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK))
283                         tx_offload_mask.data |= TX_MACIP_LEN_CMP_MASK;
284
285                 if (ol_flags & PKT_TX_IP_CKSUM)
286                         type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4;
287
288                 switch (ol_flags & PKT_TX_L4_MASK) {
289                 case PKT_TX_UDP_CKSUM:
290                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP |
291                                 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
292                         mss_l4len_idx |= sizeof(struct udp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
293                         break;
294                 case PKT_TX_TCP_CKSUM:
295                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP |
296                                 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
297                         mss_l4len_idx |= sizeof(struct tcp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
298                         break;
299                 case PKT_TX_SCTP_CKSUM:
300                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP |
301                                 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
302                         mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
303                                 << E1000_ADVTXD_L4LEN_SHIFT;
304                         break;
305                 default:
306                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_RSV |
307                                 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
308                         break;
309                 }
310         }
311
312         txq->ctx_cache[ctx_curr].flags = ol_flags;
313         txq->ctx_cache[ctx_curr].tx_offload.data =
314                 tx_offload_mask.data & tx_offload.data;
315         txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask;
316
317         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
318         vlan_macip_lens = (uint32_t)tx_offload.data;
319         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
320         ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
321         ctx_txd->seqnum_seed = 0;
322 }
323
324 /*
325  * Check which hardware context can be used. Use the existing match
326  * or create a new context descriptor.
327  */
328 static inline uint32_t
329 what_advctx_update(struct igb_tx_queue *txq, uint64_t flags,
330                 union igb_tx_offload tx_offload)
331 {
332         /* If match with the current context */
333         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
334                 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
335                 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
336                         return txq->ctx_curr;
337         }
338
339         /* If match with the second context */
340         txq->ctx_curr ^= 1;
341         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
342                 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
343                 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
344                         return txq->ctx_curr;
345         }
346
347         /* Mismatch, use the previous context */
348         return IGB_CTX_NUM;
349 }
350
351 static inline uint32_t
352 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
353 {
354         static const uint32_t l4_olinfo[2] = {0, E1000_ADVTXD_POPTS_TXSM};
355         static const uint32_t l3_olinfo[2] = {0, E1000_ADVTXD_POPTS_IXSM};
356         uint32_t tmp;
357
358         tmp  = l4_olinfo[(ol_flags & PKT_TX_L4_MASK)  != PKT_TX_L4_NO_CKSUM];
359         tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
360         tmp |= l4_olinfo[(ol_flags & PKT_TX_TCP_SEG) != 0];
361         return tmp;
362 }
363
364 static inline uint32_t
365 tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags)
366 {
367         uint32_t cmdtype;
368         static uint32_t vlan_cmd[2] = {0, E1000_ADVTXD_DCMD_VLE};
369         static uint32_t tso_cmd[2] = {0, E1000_ADVTXD_DCMD_TSE};
370         cmdtype = vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
371         cmdtype |= tso_cmd[(ol_flags & PKT_TX_TCP_SEG) != 0];
372         return cmdtype;
373 }
374
375 uint16_t
376 eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
377                uint16_t nb_pkts)
378 {
379         struct igb_tx_queue *txq;
380         struct igb_tx_entry *sw_ring;
381         struct igb_tx_entry *txe, *txn;
382         volatile union e1000_adv_tx_desc *txr;
383         volatile union e1000_adv_tx_desc *txd;
384         struct rte_mbuf     *tx_pkt;
385         struct rte_mbuf     *m_seg;
386         uint64_t buf_dma_addr;
387         uint32_t olinfo_status;
388         uint32_t cmd_type_len;
389         uint32_t pkt_len;
390         uint16_t slen;
391         uint64_t ol_flags;
392         uint16_t tx_end;
393         uint16_t tx_id;
394         uint16_t tx_last;
395         uint16_t nb_tx;
396         uint64_t tx_ol_req;
397         uint32_t new_ctx = 0;
398         uint32_t ctx = 0;
399         union igb_tx_offload tx_offload = {0};
400
401         txq = tx_queue;
402         sw_ring = txq->sw_ring;
403         txr     = txq->tx_ring;
404         tx_id   = txq->tx_tail;
405         txe = &sw_ring[tx_id];
406
407         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
408                 tx_pkt = *tx_pkts++;
409                 pkt_len = tx_pkt->pkt_len;
410
411                 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
412
413                 /*
414                  * The number of descriptors that must be allocated for a
415                  * packet is the number of segments of that packet, plus 1
416                  * Context Descriptor for the VLAN Tag Identifier, if any.
417                  * Determine the last TX descriptor to allocate in the TX ring
418                  * for the packet, starting from the current position (tx_id)
419                  * in the ring.
420                  */
421                 tx_last = (uint16_t) (tx_id + tx_pkt->nb_segs - 1);
422
423                 ol_flags = tx_pkt->ol_flags;
424                 tx_ol_req = ol_flags & IGB_TX_OFFLOAD_MASK;
425
426                 /* If a Context Descriptor need be built . */
427                 if (tx_ol_req) {
428                         tx_offload.l2_len = tx_pkt->l2_len;
429                         tx_offload.l3_len = tx_pkt->l3_len;
430                         tx_offload.l4_len = tx_pkt->l4_len;
431                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
432                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
433                         tx_ol_req = check_tso_para(tx_ol_req, tx_offload);
434
435                         ctx = what_advctx_update(txq, tx_ol_req, tx_offload);
436                         /* Only allocate context descriptor if required*/
437                         new_ctx = (ctx == IGB_CTX_NUM);
438                         ctx = txq->ctx_curr + txq->ctx_start;
439                         tx_last = (uint16_t) (tx_last + new_ctx);
440                 }
441                 if (tx_last >= txq->nb_tx_desc)
442                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
443
444                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
445                            " tx_first=%u tx_last=%u",
446                            (unsigned) txq->port_id,
447                            (unsigned) txq->queue_id,
448                            (unsigned) pkt_len,
449                            (unsigned) tx_id,
450                            (unsigned) tx_last);
451
452                 /*
453                  * Check if there are enough free descriptors in the TX ring
454                  * to transmit the next packet.
455                  * This operation is based on the two following rules:
456                  *
457                  *   1- Only check that the last needed TX descriptor can be
458                  *      allocated (by construction, if that descriptor is free,
459                  *      all intermediate ones are also free).
460                  *
461                  *      For this purpose, the index of the last TX descriptor
462                  *      used for a packet (the "last descriptor" of a packet)
463                  *      is recorded in the TX entries (the last one included)
464                  *      that are associated with all TX descriptors allocated
465                  *      for that packet.
466                  *
467                  *   2- Avoid to allocate the last free TX descriptor of the
468                  *      ring, in order to never set the TDT register with the
469                  *      same value stored in parallel by the NIC in the TDH
470                  *      register, which makes the TX engine of the NIC enter
471                  *      in a deadlock situation.
472                  *
473                  *      By extension, avoid to allocate a free descriptor that
474                  *      belongs to the last set of free descriptors allocated
475                  *      to the same packet previously transmitted.
476                  */
477
478                 /*
479                  * The "last descriptor" of the previously sent packet, if any,
480                  * which used the last descriptor to allocate.
481                  */
482                 tx_end = sw_ring[tx_last].last_id;
483
484                 /*
485                  * The next descriptor following that "last descriptor" in the
486                  * ring.
487                  */
488                 tx_end = sw_ring[tx_end].next_id;
489
490                 /*
491                  * The "last descriptor" associated with that next descriptor.
492                  */
493                 tx_end = sw_ring[tx_end].last_id;
494
495                 /*
496                  * Check that this descriptor is free.
497                  */
498                 if (! (txr[tx_end].wb.status & E1000_TXD_STAT_DD)) {
499                         if (nb_tx == 0)
500                                 return 0;
501                         goto end_of_tx;
502                 }
503
504                 /*
505                  * Set common flags of all TX Data Descriptors.
506                  *
507                  * The following bits must be set in all Data Descriptors:
508                  *   - E1000_ADVTXD_DTYP_DATA
509                  *   - E1000_ADVTXD_DCMD_DEXT
510                  *
511                  * The following bits must be set in the first Data Descriptor
512                  * and are ignored in the other ones:
513                  *   - E1000_ADVTXD_DCMD_IFCS
514                  *   - E1000_ADVTXD_MAC_1588
515                  *   - E1000_ADVTXD_DCMD_VLE
516                  *
517                  * The following bits must only be set in the last Data
518                  * Descriptor:
519                  *   - E1000_TXD_CMD_EOP
520                  *
521                  * The following bits can be set in any Data Descriptor, but
522                  * are only set in the last Data Descriptor:
523                  *   - E1000_TXD_CMD_RS
524                  */
525                 cmd_type_len = txq->txd_type |
526                         E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
527                 if (tx_ol_req & PKT_TX_TCP_SEG)
528                         pkt_len -= (tx_pkt->l2_len + tx_pkt->l3_len + tx_pkt->l4_len);
529                 olinfo_status = (pkt_len << E1000_ADVTXD_PAYLEN_SHIFT);
530 #if defined(RTE_LIBRTE_IEEE1588)
531                 if (ol_flags & PKT_TX_IEEE1588_TMST)
532                         cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
533 #endif
534                 if (tx_ol_req) {
535                         /* Setup TX Advanced context descriptor if required */
536                         if (new_ctx) {
537                                 volatile struct e1000_adv_tx_context_desc *
538                                     ctx_txd;
539
540                                 ctx_txd = (volatile struct
541                                     e1000_adv_tx_context_desc *)
542                                     &txr[tx_id];
543
544                                 txn = &sw_ring[txe->next_id];
545                                 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
546
547                                 if (txe->mbuf != NULL) {
548                                         rte_pktmbuf_free_seg(txe->mbuf);
549                                         txe->mbuf = NULL;
550                                 }
551
552                                 igbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req, tx_offload);
553
554                                 txe->last_id = tx_last;
555                                 tx_id = txe->next_id;
556                                 txe = txn;
557                         }
558
559                         /* Setup the TX Advanced Data Descriptor */
560                         cmd_type_len  |= tx_desc_vlan_flags_to_cmdtype(tx_ol_req);
561                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(tx_ol_req);
562                         olinfo_status |= (ctx << E1000_ADVTXD_IDX_SHIFT);
563                 }
564
565                 m_seg = tx_pkt;
566                 do {
567                         txn = &sw_ring[txe->next_id];
568                         txd = &txr[tx_id];
569
570                         if (txe->mbuf != NULL)
571                                 rte_pktmbuf_free_seg(txe->mbuf);
572                         txe->mbuf = m_seg;
573
574                         /*
575                          * Set up transmit descriptor.
576                          */
577                         slen = (uint16_t) m_seg->data_len;
578                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
579                         txd->read.buffer_addr =
580                                 rte_cpu_to_le_64(buf_dma_addr);
581                         txd->read.cmd_type_len =
582                                 rte_cpu_to_le_32(cmd_type_len | slen);
583                         txd->read.olinfo_status =
584                                 rte_cpu_to_le_32(olinfo_status);
585                         txe->last_id = tx_last;
586                         tx_id = txe->next_id;
587                         txe = txn;
588                         m_seg = m_seg->next;
589                 } while (m_seg != NULL);
590
591                 /*
592                  * The last packet data descriptor needs End Of Packet (EOP)
593                  * and Report Status (RS).
594                  */
595                 txd->read.cmd_type_len |=
596                         rte_cpu_to_le_32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
597         }
598  end_of_tx:
599         rte_wmb();
600
601         /*
602          * Set the Transmit Descriptor Tail (TDT).
603          */
604         E1000_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
605         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
606                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
607                    (unsigned) tx_id, (unsigned) nb_tx);
608         txq->tx_tail = tx_id;
609
610         return nb_tx;
611 }
612
613 /*********************************************************************
614  *
615  *  TX prep functions
616  *
617  **********************************************************************/
618 uint16_t
619 eth_igb_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
620                 uint16_t nb_pkts)
621 {
622         int i, ret;
623         struct rte_mbuf *m;
624
625         for (i = 0; i < nb_pkts; i++) {
626                 m = tx_pkts[i];
627
628                 /* Check some limitations for TSO in hardware */
629                 if (m->ol_flags & PKT_TX_TCP_SEG)
630                         if ((m->tso_segsz > IGB_TSO_MAX_MSS) ||
631                                         (m->l2_len + m->l3_len + m->l4_len >
632                                         IGB_TSO_MAX_HDRLEN)) {
633                                 rte_errno = -EINVAL;
634                                 return i;
635                         }
636
637                 if (m->ol_flags & IGB_TX_OFFLOAD_NOTSUP_MASK) {
638                         rte_errno = -ENOTSUP;
639                         return i;
640                 }
641
642 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
643                 ret = rte_validate_tx_offload(m);
644                 if (ret != 0) {
645                         rte_errno = ret;
646                         return i;
647                 }
648 #endif
649                 ret = rte_net_intel_cksum_prepare(m);
650                 if (ret != 0) {
651                         rte_errno = ret;
652                         return i;
653                 }
654         }
655
656         return i;
657 }
658
659 /*********************************************************************
660  *
661  *  RX functions
662  *
663  **********************************************************************/
664 #define IGB_PACKET_TYPE_IPV4              0X01
665 #define IGB_PACKET_TYPE_IPV4_TCP          0X11
666 #define IGB_PACKET_TYPE_IPV4_UDP          0X21
667 #define IGB_PACKET_TYPE_IPV4_SCTP         0X41
668 #define IGB_PACKET_TYPE_IPV4_EXT          0X03
669 #define IGB_PACKET_TYPE_IPV4_EXT_SCTP     0X43
670 #define IGB_PACKET_TYPE_IPV6              0X04
671 #define IGB_PACKET_TYPE_IPV6_TCP          0X14
672 #define IGB_PACKET_TYPE_IPV6_UDP          0X24
673 #define IGB_PACKET_TYPE_IPV6_EXT          0X0C
674 #define IGB_PACKET_TYPE_IPV6_EXT_TCP      0X1C
675 #define IGB_PACKET_TYPE_IPV6_EXT_UDP      0X2C
676 #define IGB_PACKET_TYPE_IPV4_IPV6         0X05
677 #define IGB_PACKET_TYPE_IPV4_IPV6_TCP     0X15
678 #define IGB_PACKET_TYPE_IPV4_IPV6_UDP     0X25
679 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT     0X0D
680 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
681 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
682 #define IGB_PACKET_TYPE_MAX               0X80
683 #define IGB_PACKET_TYPE_MASK              0X7F
684 #define IGB_PACKET_TYPE_SHIFT             0X04
685 static inline uint32_t
686 igb_rxd_pkt_info_to_pkt_type(uint16_t pkt_info)
687 {
688         static const uint32_t
689                 ptype_table[IGB_PACKET_TYPE_MAX] __rte_cache_aligned = {
690                 [IGB_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
691                         RTE_PTYPE_L3_IPV4,
692                 [IGB_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
693                         RTE_PTYPE_L3_IPV4_EXT,
694                 [IGB_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
695                         RTE_PTYPE_L3_IPV6,
696                 [IGB_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
697                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
698                         RTE_PTYPE_INNER_L3_IPV6,
699                 [IGB_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
700                         RTE_PTYPE_L3_IPV6_EXT,
701                 [IGB_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
702                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
703                         RTE_PTYPE_INNER_L3_IPV6_EXT,
704                 [IGB_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
705                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
706                 [IGB_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
707                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
708                 [IGB_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
709                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
710                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
711                 [IGB_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
712                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
713                 [IGB_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
714                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
715                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
716                 [IGB_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
717                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
718                 [IGB_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
719                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
720                 [IGB_PACKET_TYPE_IPV4_IPV6_UDP] =  RTE_PTYPE_L2_ETHER |
721                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
722                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
723                 [IGB_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
724                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
725                 [IGB_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
726                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
727                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
728                 [IGB_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
729                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
730                 [IGB_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
731                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
732         };
733         if (unlikely(pkt_info & E1000_RXDADV_PKTTYPE_ETQF))
734                 return RTE_PTYPE_UNKNOWN;
735
736         pkt_info = (pkt_info >> IGB_PACKET_TYPE_SHIFT) & IGB_PACKET_TYPE_MASK;
737
738         return ptype_table[pkt_info];
739 }
740
741 static inline uint64_t
742 rx_desc_hlen_type_rss_to_pkt_flags(struct igb_rx_queue *rxq, uint32_t hl_tp_rs)
743 {
744         uint64_t pkt_flags = ((hl_tp_rs & 0x0F) == 0) ?  0 : PKT_RX_RSS_HASH;
745
746 #if defined(RTE_LIBRTE_IEEE1588)
747         static uint32_t ip_pkt_etqf_map[8] = {
748                 0, 0, 0, PKT_RX_IEEE1588_PTP,
749                 0, 0, 0, 0,
750         };
751
752         struct rte_eth_dev dev = rte_eth_devices[rxq->port_id];
753         struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev.data->dev_private);
754
755         /* EtherType is in bits 8:10 in Packet Type, and not in the default 0:2 */
756         if (hw->mac.type == e1000_i210)
757                 pkt_flags |= ip_pkt_etqf_map[(hl_tp_rs >> 12) & 0x07];
758         else
759                 pkt_flags |= ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07];
760 #else
761         RTE_SET_USED(rxq);
762 #endif
763
764         return pkt_flags;
765 }
766
767 static inline uint64_t
768 rx_desc_status_to_pkt_flags(uint32_t rx_status)
769 {
770         uint64_t pkt_flags;
771
772         /* Check if VLAN present */
773         pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ?
774                 PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED : 0);
775
776 #if defined(RTE_LIBRTE_IEEE1588)
777         if (rx_status & E1000_RXD_STAT_TMST)
778                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
779 #endif
780         return pkt_flags;
781 }
782
783 static inline uint64_t
784 rx_desc_error_to_pkt_flags(uint32_t rx_status)
785 {
786         /*
787          * Bit 30: IPE, IPv4 checksum error
788          * Bit 29: L4I, L4I integrity error
789          */
790
791         static uint64_t error_to_pkt_flags_map[4] = {
792                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
793                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
794                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
795                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
796         };
797         return error_to_pkt_flags_map[(rx_status >>
798                 E1000_RXD_ERR_CKSUM_BIT) & E1000_RXD_ERR_CKSUM_MSK];
799 }
800
801 uint16_t
802 eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
803                uint16_t nb_pkts)
804 {
805         struct igb_rx_queue *rxq;
806         volatile union e1000_adv_rx_desc *rx_ring;
807         volatile union e1000_adv_rx_desc *rxdp;
808         struct igb_rx_entry *sw_ring;
809         struct igb_rx_entry *rxe;
810         struct rte_mbuf *rxm;
811         struct rte_mbuf *nmb;
812         union e1000_adv_rx_desc rxd;
813         uint64_t dma_addr;
814         uint32_t staterr;
815         uint32_t hlen_type_rss;
816         uint16_t pkt_len;
817         uint16_t rx_id;
818         uint16_t nb_rx;
819         uint16_t nb_hold;
820         uint64_t pkt_flags;
821
822         nb_rx = 0;
823         nb_hold = 0;
824         rxq = rx_queue;
825         rx_id = rxq->rx_tail;
826         rx_ring = rxq->rx_ring;
827         sw_ring = rxq->sw_ring;
828         while (nb_rx < nb_pkts) {
829                 /*
830                  * The order of operations here is important as the DD status
831                  * bit must not be read after any other descriptor fields.
832                  * rx_ring and rxdp are pointing to volatile data so the order
833                  * of accesses cannot be reordered by the compiler. If they were
834                  * not volatile, they could be reordered which could lead to
835                  * using invalid descriptor fields when read from rxd.
836                  */
837                 rxdp = &rx_ring[rx_id];
838                 staterr = rxdp->wb.upper.status_error;
839                 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
840                         break;
841                 rxd = *rxdp;
842
843                 /*
844                  * End of packet.
845                  *
846                  * If the E1000_RXD_STAT_EOP flag is not set, the RX packet is
847                  * likely to be invalid and to be dropped by the various
848                  * validation checks performed by the network stack.
849                  *
850                  * Allocate a new mbuf to replenish the RX ring descriptor.
851                  * If the allocation fails:
852                  *    - arrange for that RX descriptor to be the first one
853                  *      being parsed the next time the receive function is
854                  *      invoked [on the same queue].
855                  *
856                  *    - Stop parsing the RX ring and return immediately.
857                  *
858                  * This policy do not drop the packet received in the RX
859                  * descriptor for which the allocation of a new mbuf failed.
860                  * Thus, it allows that packet to be later retrieved if
861                  * mbuf have been freed in the mean time.
862                  * As a side effect, holding RX descriptors instead of
863                  * systematically giving them back to the NIC may lead to
864                  * RX ring exhaustion situations.
865                  * However, the NIC can gracefully prevent such situations
866                  * to happen by sending specific "back-pressure" flow control
867                  * frames to its peer(s).
868                  */
869                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
870                            "staterr=0x%x pkt_len=%u",
871                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
872                            (unsigned) rx_id, (unsigned) staterr,
873                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
874
875                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
876                 if (nmb == NULL) {
877                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
878                                    "queue_id=%u", (unsigned) rxq->port_id,
879                                    (unsigned) rxq->queue_id);
880                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
881                         break;
882                 }
883
884                 nb_hold++;
885                 rxe = &sw_ring[rx_id];
886                 rx_id++;
887                 if (rx_id == rxq->nb_rx_desc)
888                         rx_id = 0;
889
890                 /* Prefetch next mbuf while processing current one. */
891                 rte_igb_prefetch(sw_ring[rx_id].mbuf);
892
893                 /*
894                  * When next RX descriptor is on a cache-line boundary,
895                  * prefetch the next 4 RX descriptors and the next 8 pointers
896                  * to mbufs.
897                  */
898                 if ((rx_id & 0x3) == 0) {
899                         rte_igb_prefetch(&rx_ring[rx_id]);
900                         rte_igb_prefetch(&sw_ring[rx_id]);
901                 }
902
903                 rxm = rxe->mbuf;
904                 rxe->mbuf = nmb;
905                 dma_addr =
906                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
907                 rxdp->read.hdr_addr = 0;
908                 rxdp->read.pkt_addr = dma_addr;
909
910                 /*
911                  * Initialize the returned mbuf.
912                  * 1) setup generic mbuf fields:
913                  *    - number of segments,
914                  *    - next segment,
915                  *    - packet length,
916                  *    - RX port identifier.
917                  * 2) integrate hardware offload data, if any:
918                  *    - RSS flag & hash,
919                  *    - IP checksum flag,
920                  *    - VLAN TCI, if any,
921                  *    - error flags.
922                  */
923                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
924                                       rxq->crc_len);
925                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
926                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
927                 rxm->nb_segs = 1;
928                 rxm->next = NULL;
929                 rxm->pkt_len = pkt_len;
930                 rxm->data_len = pkt_len;
931                 rxm->port = rxq->port_id;
932
933                 rxm->hash.rss = rxd.wb.lower.hi_dword.rss;
934                 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
935
936                 /*
937                  * The vlan_tci field is only valid when PKT_RX_VLAN is
938                  * set in the pkt_flags field and must be in CPU byte order.
939                  */
940                 if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
941                                 (rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
942                         rxm->vlan_tci = rte_be_to_cpu_16(rxd.wb.upper.vlan);
943                 } else {
944                         rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
945                 }
946                 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
947                 pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
948                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
949                 rxm->ol_flags = pkt_flags;
950                 rxm->packet_type = igb_rxd_pkt_info_to_pkt_type(rxd.wb.lower.
951                                                 lo_dword.hs_rss.pkt_info);
952
953                 /*
954                  * Store the mbuf address into the next entry of the array
955                  * of returned packets.
956                  */
957                 rx_pkts[nb_rx++] = rxm;
958         }
959         rxq->rx_tail = rx_id;
960
961         /*
962          * If the number of free RX descriptors is greater than the RX free
963          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
964          * register.
965          * Update the RDT with the value of the last processed RX descriptor
966          * minus 1, to guarantee that the RDT register is never equal to the
967          * RDH register, which creates a "full" ring situtation from the
968          * hardware point of view...
969          */
970         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
971         if (nb_hold > rxq->rx_free_thresh) {
972                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
973                            "nb_hold=%u nb_rx=%u",
974                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
975                            (unsigned) rx_id, (unsigned) nb_hold,
976                            (unsigned) nb_rx);
977                 rx_id = (uint16_t) ((rx_id == 0) ?
978                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
979                 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
980                 nb_hold = 0;
981         }
982         rxq->nb_rx_hold = nb_hold;
983         return nb_rx;
984 }
985
986 uint16_t
987 eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
988                          uint16_t nb_pkts)
989 {
990         struct igb_rx_queue *rxq;
991         volatile union e1000_adv_rx_desc *rx_ring;
992         volatile union e1000_adv_rx_desc *rxdp;
993         struct igb_rx_entry *sw_ring;
994         struct igb_rx_entry *rxe;
995         struct rte_mbuf *first_seg;
996         struct rte_mbuf *last_seg;
997         struct rte_mbuf *rxm;
998         struct rte_mbuf *nmb;
999         union e1000_adv_rx_desc rxd;
1000         uint64_t dma; /* Physical address of mbuf data buffer */
1001         uint32_t staterr;
1002         uint32_t hlen_type_rss;
1003         uint16_t rx_id;
1004         uint16_t nb_rx;
1005         uint16_t nb_hold;
1006         uint16_t data_len;
1007         uint64_t pkt_flags;
1008
1009         nb_rx = 0;
1010         nb_hold = 0;
1011         rxq = rx_queue;
1012         rx_id = rxq->rx_tail;
1013         rx_ring = rxq->rx_ring;
1014         sw_ring = rxq->sw_ring;
1015
1016         /*
1017          * Retrieve RX context of current packet, if any.
1018          */
1019         first_seg = rxq->pkt_first_seg;
1020         last_seg = rxq->pkt_last_seg;
1021
1022         while (nb_rx < nb_pkts) {
1023         next_desc:
1024                 /*
1025                  * The order of operations here is important as the DD status
1026                  * bit must not be read after any other descriptor fields.
1027                  * rx_ring and rxdp are pointing to volatile data so the order
1028                  * of accesses cannot be reordered by the compiler. If they were
1029                  * not volatile, they could be reordered which could lead to
1030                  * using invalid descriptor fields when read from rxd.
1031                  */
1032                 rxdp = &rx_ring[rx_id];
1033                 staterr = rxdp->wb.upper.status_error;
1034                 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
1035                         break;
1036                 rxd = *rxdp;
1037
1038                 /*
1039                  * Descriptor done.
1040                  *
1041                  * Allocate a new mbuf to replenish the RX ring descriptor.
1042                  * If the allocation fails:
1043                  *    - arrange for that RX descriptor to be the first one
1044                  *      being parsed the next time the receive function is
1045                  *      invoked [on the same queue].
1046                  *
1047                  *    - Stop parsing the RX ring and return immediately.
1048                  *
1049                  * This policy does not drop the packet received in the RX
1050                  * descriptor for which the allocation of a new mbuf failed.
1051                  * Thus, it allows that packet to be later retrieved if
1052                  * mbuf have been freed in the mean time.
1053                  * As a side effect, holding RX descriptors instead of
1054                  * systematically giving them back to the NIC may lead to
1055                  * RX ring exhaustion situations.
1056                  * However, the NIC can gracefully prevent such situations
1057                  * to happen by sending specific "back-pressure" flow control
1058                  * frames to its peer(s).
1059                  */
1060                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1061                            "staterr=0x%x data_len=%u",
1062                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1063                            (unsigned) rx_id, (unsigned) staterr,
1064                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1065
1066                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1067                 if (nmb == NULL) {
1068                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1069                                    "queue_id=%u", (unsigned) rxq->port_id,
1070                                    (unsigned) rxq->queue_id);
1071                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1072                         break;
1073                 }
1074
1075                 nb_hold++;
1076                 rxe = &sw_ring[rx_id];
1077                 rx_id++;
1078                 if (rx_id == rxq->nb_rx_desc)
1079                         rx_id = 0;
1080
1081                 /* Prefetch next mbuf while processing current one. */
1082                 rte_igb_prefetch(sw_ring[rx_id].mbuf);
1083
1084                 /*
1085                  * When next RX descriptor is on a cache-line boundary,
1086                  * prefetch the next 4 RX descriptors and the next 8 pointers
1087                  * to mbufs.
1088                  */
1089                 if ((rx_id & 0x3) == 0) {
1090                         rte_igb_prefetch(&rx_ring[rx_id]);
1091                         rte_igb_prefetch(&sw_ring[rx_id]);
1092                 }
1093
1094                 /*
1095                  * Update RX descriptor with the physical address of the new
1096                  * data buffer of the new allocated mbuf.
1097                  */
1098                 rxm = rxe->mbuf;
1099                 rxe->mbuf = nmb;
1100                 dma = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1101                 rxdp->read.pkt_addr = dma;
1102                 rxdp->read.hdr_addr = 0;
1103
1104                 /*
1105                  * Set data length & data buffer address of mbuf.
1106                  */
1107                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1108                 rxm->data_len = data_len;
1109                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1110
1111                 /*
1112                  * If this is the first buffer of the received packet,
1113                  * set the pointer to the first mbuf of the packet and
1114                  * initialize its context.
1115                  * Otherwise, update the total length and the number of segments
1116                  * of the current scattered packet, and update the pointer to
1117                  * the last mbuf of the current packet.
1118                  */
1119                 if (first_seg == NULL) {
1120                         first_seg = rxm;
1121                         first_seg->pkt_len = data_len;
1122                         first_seg->nb_segs = 1;
1123                 } else {
1124                         first_seg->pkt_len += data_len;
1125                         first_seg->nb_segs++;
1126                         last_seg->next = rxm;
1127                 }
1128
1129                 /*
1130                  * If this is not the last buffer of the received packet,
1131                  * update the pointer to the last mbuf of the current scattered
1132                  * packet and continue to parse the RX ring.
1133                  */
1134                 if (! (staterr & E1000_RXD_STAT_EOP)) {
1135                         last_seg = rxm;
1136                         goto next_desc;
1137                 }
1138
1139                 /*
1140                  * This is the last buffer of the received packet.
1141                  * If the CRC is not stripped by the hardware:
1142                  *   - Subtract the CRC length from the total packet length.
1143                  *   - If the last buffer only contains the whole CRC or a part
1144                  *     of it, free the mbuf associated to the last buffer.
1145                  *     If part of the CRC is also contained in the previous
1146                  *     mbuf, subtract the length of that CRC part from the
1147                  *     data length of the previous mbuf.
1148                  */
1149                 rxm->next = NULL;
1150                 if (unlikely(rxq->crc_len > 0)) {
1151                         first_seg->pkt_len -= RTE_ETHER_CRC_LEN;
1152                         if (data_len <= RTE_ETHER_CRC_LEN) {
1153                                 rte_pktmbuf_free_seg(rxm);
1154                                 first_seg->nb_segs--;
1155                                 last_seg->data_len = (uint16_t)
1156                                         (last_seg->data_len -
1157                                          (RTE_ETHER_CRC_LEN - data_len));
1158                                 last_seg->next = NULL;
1159                         } else
1160                                 rxm->data_len = (uint16_t)
1161                                         (data_len - RTE_ETHER_CRC_LEN);
1162                 }
1163
1164                 /*
1165                  * Initialize the first mbuf of the returned packet:
1166                  *    - RX port identifier,
1167                  *    - hardware offload data, if any:
1168                  *      - RSS flag & hash,
1169                  *      - IP checksum flag,
1170                  *      - VLAN TCI, if any,
1171                  *      - error flags.
1172                  */
1173                 first_seg->port = rxq->port_id;
1174                 first_seg->hash.rss = rxd.wb.lower.hi_dword.rss;
1175
1176                 /*
1177                  * The vlan_tci field is only valid when PKT_RX_VLAN is
1178                  * set in the pkt_flags field and must be in CPU byte order.
1179                  */
1180                 if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
1181                                 (rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
1182                         first_seg->vlan_tci =
1183                                 rte_be_to_cpu_16(rxd.wb.upper.vlan);
1184                 } else {
1185                         first_seg->vlan_tci =
1186                                 rte_le_to_cpu_16(rxd.wb.upper.vlan);
1187                 }
1188                 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1189                 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
1190                 pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
1191                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1192                 first_seg->ol_flags = pkt_flags;
1193                 first_seg->packet_type = igb_rxd_pkt_info_to_pkt_type(rxd.wb.
1194                                         lower.lo_dword.hs_rss.pkt_info);
1195
1196                 /* Prefetch data of first segment, if configured to do so. */
1197                 rte_packet_prefetch((char *)first_seg->buf_addr +
1198                         first_seg->data_off);
1199
1200                 /*
1201                  * Store the mbuf address into the next entry of the array
1202                  * of returned packets.
1203                  */
1204                 rx_pkts[nb_rx++] = first_seg;
1205
1206                 /*
1207                  * Setup receipt context for a new packet.
1208                  */
1209                 first_seg = NULL;
1210         }
1211
1212         /*
1213          * Record index of the next RX descriptor to probe.
1214          */
1215         rxq->rx_tail = rx_id;
1216
1217         /*
1218          * Save receive context.
1219          */
1220         rxq->pkt_first_seg = first_seg;
1221         rxq->pkt_last_seg = last_seg;
1222
1223         /*
1224          * If the number of free RX descriptors is greater than the RX free
1225          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1226          * register.
1227          * Update the RDT with the value of the last processed RX descriptor
1228          * minus 1, to guarantee that the RDT register is never equal to the
1229          * RDH register, which creates a "full" ring situtation from the
1230          * hardware point of view...
1231          */
1232         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1233         if (nb_hold > rxq->rx_free_thresh) {
1234                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1235                            "nb_hold=%u nb_rx=%u",
1236                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1237                            (unsigned) rx_id, (unsigned) nb_hold,
1238                            (unsigned) nb_rx);
1239                 rx_id = (uint16_t) ((rx_id == 0) ?
1240                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1241                 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1242                 nb_hold = 0;
1243         }
1244         rxq->nb_rx_hold = nb_hold;
1245         return nb_rx;
1246 }
1247
1248 /*
1249  * Maximum number of Ring Descriptors.
1250  *
1251  * Since RDLEN/TDLEN should be multiple of 128bytes, the number of ring
1252  * desscriptors should meet the following condition:
1253  *      (num_ring_desc * sizeof(struct e1000_rx/tx_desc)) % 128 == 0
1254  */
1255
1256 static void
1257 igb_tx_queue_release_mbufs(struct igb_tx_queue *txq)
1258 {
1259         unsigned i;
1260
1261         if (txq->sw_ring != NULL) {
1262                 for (i = 0; i < txq->nb_tx_desc; i++) {
1263                         if (txq->sw_ring[i].mbuf != NULL) {
1264                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1265                                 txq->sw_ring[i].mbuf = NULL;
1266                         }
1267                 }
1268         }
1269 }
1270
1271 static void
1272 igb_tx_queue_release(struct igb_tx_queue *txq)
1273 {
1274         if (txq != NULL) {
1275                 igb_tx_queue_release_mbufs(txq);
1276                 rte_free(txq->sw_ring);
1277                 rte_free(txq);
1278         }
1279 }
1280
1281 void
1282 eth_igb_tx_queue_release(void *txq)
1283 {
1284         igb_tx_queue_release(txq);
1285 }
1286
1287 static int
1288 igb_tx_done_cleanup(struct igb_tx_queue *txq, uint32_t free_cnt)
1289 {
1290         struct igb_tx_entry *sw_ring;
1291         volatile union e1000_adv_tx_desc *txr;
1292         uint16_t tx_first; /* First segment analyzed. */
1293         uint16_t tx_id;    /* Current segment being processed. */
1294         uint16_t tx_last;  /* Last segment in the current packet. */
1295         uint16_t tx_next;  /* First segment of the next packet. */
1296         int count;
1297
1298         if (txq != NULL) {
1299                 count = 0;
1300                 sw_ring = txq->sw_ring;
1301                 txr = txq->tx_ring;
1302
1303                 /*
1304                  * tx_tail is the last sent packet on the sw_ring. Goto the end
1305                  * of that packet (the last segment in the packet chain) and
1306                  * then the next segment will be the start of the oldest segment
1307                  * in the sw_ring. This is the first packet that will be
1308                  * attempted to be freed.
1309                  */
1310
1311                 /* Get last segment in most recently added packet. */
1312                 tx_first = sw_ring[txq->tx_tail].last_id;
1313
1314                 /* Get the next segment, which is the oldest segment in ring. */
1315                 tx_first = sw_ring[tx_first].next_id;
1316
1317                 /* Set the current index to the first. */
1318                 tx_id = tx_first;
1319
1320                 /*
1321                  * Loop through each packet. For each packet, verify that an
1322                  * mbuf exists and that the last segment is free. If so, free
1323                  * it and move on.
1324                  */
1325                 while (1) {
1326                         tx_last = sw_ring[tx_id].last_id;
1327
1328                         if (sw_ring[tx_last].mbuf) {
1329                                 if (txr[tx_last].wb.status &
1330                                                 E1000_TXD_STAT_DD) {
1331                                         /*
1332                                          * Increment the number of packets
1333                                          * freed.
1334                                          */
1335                                         count++;
1336
1337                                         /* Get the start of the next packet. */
1338                                         tx_next = sw_ring[tx_last].next_id;
1339
1340                                         /*
1341                                          * Loop through all segments in a
1342                                          * packet.
1343                                          */
1344                                         do {
1345                                                 rte_pktmbuf_free_seg(sw_ring[tx_id].mbuf);
1346                                                 sw_ring[tx_id].mbuf = NULL;
1347                                                 sw_ring[tx_id].last_id = tx_id;
1348
1349                                                 /* Move to next segemnt. */
1350                                                 tx_id = sw_ring[tx_id].next_id;
1351
1352                                         } while (tx_id != tx_next);
1353
1354                                         if (unlikely(count == (int)free_cnt))
1355                                                 break;
1356                                 } else
1357                                         /*
1358                                          * mbuf still in use, nothing left to
1359                                          * free.
1360                                          */
1361                                         break;
1362                         } else {
1363                                 /*
1364                                  * There are multiple reasons to be here:
1365                                  * 1) All the packets on the ring have been
1366                                  *    freed - tx_id is equal to tx_first
1367                                  *    and some packets have been freed.
1368                                  *    - Done, exit
1369                                  * 2) Interfaces has not sent a rings worth of
1370                                  *    packets yet, so the segment after tail is
1371                                  *    still empty. Or a previous call to this
1372                                  *    function freed some of the segments but
1373                                  *    not all so there is a hole in the list.
1374                                  *    Hopefully this is a rare case.
1375                                  *    - Walk the list and find the next mbuf. If
1376                                  *      there isn't one, then done.
1377                                  */
1378                                 if (likely((tx_id == tx_first) && (count != 0)))
1379                                         break;
1380
1381                                 /*
1382                                  * Walk the list and find the next mbuf, if any.
1383                                  */
1384                                 do {
1385                                         /* Move to next segemnt. */
1386                                         tx_id = sw_ring[tx_id].next_id;
1387
1388                                         if (sw_ring[tx_id].mbuf)
1389                                                 break;
1390
1391                                 } while (tx_id != tx_first);
1392
1393                                 /*
1394                                  * Determine why previous loop bailed. If there
1395                                  * is not an mbuf, done.
1396                                  */
1397                                 if (sw_ring[tx_id].mbuf == NULL)
1398                                         break;
1399                         }
1400                 }
1401         } else
1402                 count = -ENODEV;
1403
1404         return count;
1405 }
1406
1407 int
1408 eth_igb_tx_done_cleanup(void *txq, uint32_t free_cnt)
1409 {
1410         return igb_tx_done_cleanup(txq, free_cnt);
1411 }
1412
1413 static void
1414 igb_reset_tx_queue_stat(struct igb_tx_queue *txq)
1415 {
1416         txq->tx_head = 0;
1417         txq->tx_tail = 0;
1418         txq->ctx_curr = 0;
1419         memset((void*)&txq->ctx_cache, 0,
1420                 IGB_CTX_NUM * sizeof(struct igb_advctx_info));
1421 }
1422
1423 static void
1424 igb_reset_tx_queue(struct igb_tx_queue *txq, struct rte_eth_dev *dev)
1425 {
1426         static const union e1000_adv_tx_desc zeroed_desc = {{0}};
1427         struct igb_tx_entry *txe = txq->sw_ring;
1428         uint16_t i, prev;
1429         struct e1000_hw *hw;
1430
1431         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1432         /* Zero out HW ring memory */
1433         for (i = 0; i < txq->nb_tx_desc; i++) {
1434                 txq->tx_ring[i] = zeroed_desc;
1435         }
1436
1437         /* Initialize ring entries */
1438         prev = (uint16_t)(txq->nb_tx_desc - 1);
1439         for (i = 0; i < txq->nb_tx_desc; i++) {
1440                 volatile union e1000_adv_tx_desc *txd = &(txq->tx_ring[i]);
1441
1442                 txd->wb.status = E1000_TXD_STAT_DD;
1443                 txe[i].mbuf = NULL;
1444                 txe[i].last_id = i;
1445                 txe[prev].next_id = i;
1446                 prev = i;
1447         }
1448
1449         txq->txd_type = E1000_ADVTXD_DTYP_DATA;
1450         /* 82575 specific, each tx queue will use 2 hw contexts */
1451         if (hw->mac.type == e1000_82575)
1452                 txq->ctx_start = txq->queue_id * IGB_CTX_NUM;
1453
1454         igb_reset_tx_queue_stat(txq);
1455 }
1456
1457 uint64_t
1458 igb_get_tx_port_offloads_capa(struct rte_eth_dev *dev)
1459 {
1460         uint64_t tx_offload_capa;
1461
1462         RTE_SET_USED(dev);
1463         tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT |
1464                           DEV_TX_OFFLOAD_IPV4_CKSUM  |
1465                           DEV_TX_OFFLOAD_UDP_CKSUM   |
1466                           DEV_TX_OFFLOAD_TCP_CKSUM   |
1467                           DEV_TX_OFFLOAD_SCTP_CKSUM  |
1468                           DEV_TX_OFFLOAD_TCP_TSO     |
1469                           DEV_TX_OFFLOAD_MULTI_SEGS;
1470
1471         return tx_offload_capa;
1472 }
1473
1474 uint64_t
1475 igb_get_tx_queue_offloads_capa(struct rte_eth_dev *dev)
1476 {
1477         uint64_t tx_queue_offload_capa;
1478
1479         tx_queue_offload_capa = igb_get_tx_port_offloads_capa(dev);
1480
1481         return tx_queue_offload_capa;
1482 }
1483
1484 int
1485 eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
1486                          uint16_t queue_idx,
1487                          uint16_t nb_desc,
1488                          unsigned int socket_id,
1489                          const struct rte_eth_txconf *tx_conf)
1490 {
1491         const struct rte_memzone *tz;
1492         struct igb_tx_queue *txq;
1493         struct e1000_hw     *hw;
1494         uint32_t size;
1495         uint64_t offloads;
1496
1497         offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
1498
1499         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1500
1501         /*
1502          * Validate number of transmit descriptors.
1503          * It must not exceed hardware maximum, and must be multiple
1504          * of E1000_ALIGN.
1505          */
1506         if (nb_desc % IGB_TXD_ALIGN != 0 ||
1507                         (nb_desc > E1000_MAX_RING_DESC) ||
1508                         (nb_desc < E1000_MIN_RING_DESC)) {
1509                 return -EINVAL;
1510         }
1511
1512         /*
1513          * The tx_free_thresh and tx_rs_thresh values are not used in the 1G
1514          * driver.
1515          */
1516         if (tx_conf->tx_free_thresh != 0)
1517                 PMD_INIT_LOG(INFO, "The tx_free_thresh parameter is not "
1518                              "used for the 1G driver.");
1519         if (tx_conf->tx_rs_thresh != 0)
1520                 PMD_INIT_LOG(INFO, "The tx_rs_thresh parameter is not "
1521                              "used for the 1G driver.");
1522         if (tx_conf->tx_thresh.wthresh == 0 && hw->mac.type != e1000_82576)
1523                 PMD_INIT_LOG(INFO, "To improve 1G driver performance, "
1524                              "consider setting the TX WTHRESH value to 4, 8, "
1525                              "or 16.");
1526
1527         /* Free memory prior to re-allocation if needed */
1528         if (dev->data->tx_queues[queue_idx] != NULL) {
1529                 igb_tx_queue_release(dev->data->tx_queues[queue_idx]);
1530                 dev->data->tx_queues[queue_idx] = NULL;
1531         }
1532
1533         /* First allocate the tx queue data structure */
1534         txq = rte_zmalloc("ethdev TX queue", sizeof(struct igb_tx_queue),
1535                                                         RTE_CACHE_LINE_SIZE);
1536         if (txq == NULL)
1537                 return -ENOMEM;
1538
1539         /*
1540          * Allocate TX ring hardware descriptors. A memzone large enough to
1541          * handle the maximum ring size is allocated in order to allow for
1542          * resizing in later calls to the queue setup function.
1543          */
1544         size = sizeof(union e1000_adv_tx_desc) * E1000_MAX_RING_DESC;
1545         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, size,
1546                                       E1000_ALIGN, socket_id);
1547         if (tz == NULL) {
1548                 igb_tx_queue_release(txq);
1549                 return -ENOMEM;
1550         }
1551
1552         txq->nb_tx_desc = nb_desc;
1553         txq->pthresh = tx_conf->tx_thresh.pthresh;
1554         txq->hthresh = tx_conf->tx_thresh.hthresh;
1555         txq->wthresh = tx_conf->tx_thresh.wthresh;
1556         if (txq->wthresh > 0 && hw->mac.type == e1000_82576)
1557                 txq->wthresh = 1;
1558         txq->queue_id = queue_idx;
1559         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1560                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1561         txq->port_id = dev->data->port_id;
1562
1563         txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(txq->reg_idx));
1564         txq->tx_ring_phys_addr = tz->iova;
1565
1566         txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
1567         /* Allocate software ring */
1568         txq->sw_ring = rte_zmalloc("txq->sw_ring",
1569                                    sizeof(struct igb_tx_entry) * nb_desc,
1570                                    RTE_CACHE_LINE_SIZE);
1571         if (txq->sw_ring == NULL) {
1572                 igb_tx_queue_release(txq);
1573                 return -ENOMEM;
1574         }
1575         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
1576                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
1577
1578         igb_reset_tx_queue(txq, dev);
1579         dev->tx_pkt_burst = eth_igb_xmit_pkts;
1580         dev->tx_pkt_prepare = &eth_igb_prep_pkts;
1581         dev->data->tx_queues[queue_idx] = txq;
1582         txq->offloads = offloads;
1583
1584         return 0;
1585 }
1586
1587 static void
1588 igb_rx_queue_release_mbufs(struct igb_rx_queue *rxq)
1589 {
1590         unsigned i;
1591
1592         if (rxq->sw_ring != NULL) {
1593                 for (i = 0; i < rxq->nb_rx_desc; i++) {
1594                         if (rxq->sw_ring[i].mbuf != NULL) {
1595                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
1596                                 rxq->sw_ring[i].mbuf = NULL;
1597                         }
1598                 }
1599         }
1600 }
1601
1602 static void
1603 igb_rx_queue_release(struct igb_rx_queue *rxq)
1604 {
1605         if (rxq != NULL) {
1606                 igb_rx_queue_release_mbufs(rxq);
1607                 rte_free(rxq->sw_ring);
1608                 rte_free(rxq);
1609         }
1610 }
1611
1612 void
1613 eth_igb_rx_queue_release(void *rxq)
1614 {
1615         igb_rx_queue_release(rxq);
1616 }
1617
1618 static void
1619 igb_reset_rx_queue(struct igb_rx_queue *rxq)
1620 {
1621         static const union e1000_adv_rx_desc zeroed_desc = {{0}};
1622         unsigned i;
1623
1624         /* Zero out HW ring memory */
1625         for (i = 0; i < rxq->nb_rx_desc; i++) {
1626                 rxq->rx_ring[i] = zeroed_desc;
1627         }
1628
1629         rxq->rx_tail = 0;
1630         rxq->pkt_first_seg = NULL;
1631         rxq->pkt_last_seg = NULL;
1632 }
1633
1634 uint64_t
1635 igb_get_rx_port_offloads_capa(struct rte_eth_dev *dev)
1636 {
1637         uint64_t rx_offload_capa;
1638
1639         RTE_SET_USED(dev);
1640         rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP  |
1641                           DEV_RX_OFFLOAD_VLAN_FILTER |
1642                           DEV_RX_OFFLOAD_IPV4_CKSUM  |
1643                           DEV_RX_OFFLOAD_UDP_CKSUM   |
1644                           DEV_RX_OFFLOAD_TCP_CKSUM   |
1645                           DEV_RX_OFFLOAD_JUMBO_FRAME |
1646                           DEV_RX_OFFLOAD_KEEP_CRC    |
1647                           DEV_RX_OFFLOAD_SCATTER;
1648
1649         return rx_offload_capa;
1650 }
1651
1652 uint64_t
1653 igb_get_rx_queue_offloads_capa(struct rte_eth_dev *dev)
1654 {
1655         struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1656         uint64_t rx_queue_offload_capa;
1657
1658         switch (hw->mac.type) {
1659         case e1000_vfadapt_i350:
1660                 /*
1661                  * As only one Rx queue can be used, let per queue offloading
1662                  * capability be same to per port queue offloading capability
1663                  * for better convenience.
1664                  */
1665                 rx_queue_offload_capa = igb_get_rx_port_offloads_capa(dev);
1666                 break;
1667         default:
1668                 rx_queue_offload_capa = 0;
1669         }
1670         return rx_queue_offload_capa;
1671 }
1672
1673 int
1674 eth_igb_rx_queue_setup(struct rte_eth_dev *dev,
1675                          uint16_t queue_idx,
1676                          uint16_t nb_desc,
1677                          unsigned int socket_id,
1678                          const struct rte_eth_rxconf *rx_conf,
1679                          struct rte_mempool *mp)
1680 {
1681         const struct rte_memzone *rz;
1682         struct igb_rx_queue *rxq;
1683         struct e1000_hw     *hw;
1684         unsigned int size;
1685         uint64_t offloads;
1686
1687         offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
1688
1689         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1690
1691         /*
1692          * Validate number of receive descriptors.
1693          * It must not exceed hardware maximum, and must be multiple
1694          * of E1000_ALIGN.
1695          */
1696         if (nb_desc % IGB_RXD_ALIGN != 0 ||
1697                         (nb_desc > E1000_MAX_RING_DESC) ||
1698                         (nb_desc < E1000_MIN_RING_DESC)) {
1699                 return -EINVAL;
1700         }
1701
1702         /* Free memory prior to re-allocation if needed */
1703         if (dev->data->rx_queues[queue_idx] != NULL) {
1704                 igb_rx_queue_release(dev->data->rx_queues[queue_idx]);
1705                 dev->data->rx_queues[queue_idx] = NULL;
1706         }
1707
1708         /* First allocate the RX queue data structure. */
1709         rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igb_rx_queue),
1710                           RTE_CACHE_LINE_SIZE);
1711         if (rxq == NULL)
1712                 return -ENOMEM;
1713         rxq->offloads = offloads;
1714         rxq->mb_pool = mp;
1715         rxq->nb_rx_desc = nb_desc;
1716         rxq->pthresh = rx_conf->rx_thresh.pthresh;
1717         rxq->hthresh = rx_conf->rx_thresh.hthresh;
1718         rxq->wthresh = rx_conf->rx_thresh.wthresh;
1719         if (rxq->wthresh > 0 &&
1720             (hw->mac.type == e1000_82576 || hw->mac.type == e1000_vfadapt_i350))
1721                 rxq->wthresh = 1;
1722         rxq->drop_en = rx_conf->rx_drop_en;
1723         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1724         rxq->queue_id = queue_idx;
1725         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1726                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1727         rxq->port_id = dev->data->port_id;
1728         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
1729                 rxq->crc_len = RTE_ETHER_CRC_LEN;
1730         else
1731                 rxq->crc_len = 0;
1732
1733         /*
1734          *  Allocate RX ring hardware descriptors. A memzone large enough to
1735          *  handle the maximum ring size is allocated in order to allow for
1736          *  resizing in later calls to the queue setup function.
1737          */
1738         size = sizeof(union e1000_adv_rx_desc) * E1000_MAX_RING_DESC;
1739         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, size,
1740                                       E1000_ALIGN, socket_id);
1741         if (rz == NULL) {
1742                 igb_rx_queue_release(rxq);
1743                 return -ENOMEM;
1744         }
1745         rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(rxq->reg_idx));
1746         rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(rxq->reg_idx));
1747         rxq->rx_ring_phys_addr = rz->iova;
1748         rxq->rx_ring = (union e1000_adv_rx_desc *) rz->addr;
1749
1750         /* Allocate software ring. */
1751         rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1752                                    sizeof(struct igb_rx_entry) * nb_desc,
1753                                    RTE_CACHE_LINE_SIZE);
1754         if (rxq->sw_ring == NULL) {
1755                 igb_rx_queue_release(rxq);
1756                 return -ENOMEM;
1757         }
1758         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
1759                      rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1760
1761         dev->data->rx_queues[queue_idx] = rxq;
1762         igb_reset_rx_queue(rxq);
1763
1764         return 0;
1765 }
1766
1767 uint32_t
1768 eth_igb_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1769 {
1770 #define IGB_RXQ_SCAN_INTERVAL 4
1771         volatile union e1000_adv_rx_desc *rxdp;
1772         struct igb_rx_queue *rxq;
1773         uint32_t desc = 0;
1774
1775         rxq = dev->data->rx_queues[rx_queue_id];
1776         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
1777
1778         while ((desc < rxq->nb_rx_desc) &&
1779                 (rxdp->wb.upper.status_error & E1000_RXD_STAT_DD)) {
1780                 desc += IGB_RXQ_SCAN_INTERVAL;
1781                 rxdp += IGB_RXQ_SCAN_INTERVAL;
1782                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
1783                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
1784                                 desc - rxq->nb_rx_desc]);
1785         }
1786
1787         return desc;
1788 }
1789
1790 int
1791 eth_igb_rx_descriptor_done(void *rx_queue, uint16_t offset)
1792 {
1793         volatile union e1000_adv_rx_desc *rxdp;
1794         struct igb_rx_queue *rxq = rx_queue;
1795         uint32_t desc;
1796
1797         if (unlikely(offset >= rxq->nb_rx_desc))
1798                 return 0;
1799         desc = rxq->rx_tail + offset;
1800         if (desc >= rxq->nb_rx_desc)
1801                 desc -= rxq->nb_rx_desc;
1802
1803         rxdp = &rxq->rx_ring[desc];
1804         return !!(rxdp->wb.upper.status_error & E1000_RXD_STAT_DD);
1805 }
1806
1807 int
1808 eth_igb_rx_descriptor_status(void *rx_queue, uint16_t offset)
1809 {
1810         struct igb_rx_queue *rxq = rx_queue;
1811         volatile uint32_t *status;
1812         uint32_t desc;
1813
1814         if (unlikely(offset >= rxq->nb_rx_desc))
1815                 return -EINVAL;
1816
1817         if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold)
1818                 return RTE_ETH_RX_DESC_UNAVAIL;
1819
1820         desc = rxq->rx_tail + offset;
1821         if (desc >= rxq->nb_rx_desc)
1822                 desc -= rxq->nb_rx_desc;
1823
1824         status = &rxq->rx_ring[desc].wb.upper.status_error;
1825         if (*status & rte_cpu_to_le_32(E1000_RXD_STAT_DD))
1826                 return RTE_ETH_RX_DESC_DONE;
1827
1828         return RTE_ETH_RX_DESC_AVAIL;
1829 }
1830
1831 int
1832 eth_igb_tx_descriptor_status(void *tx_queue, uint16_t offset)
1833 {
1834         struct igb_tx_queue *txq = tx_queue;
1835         volatile uint32_t *status;
1836         uint32_t desc;
1837
1838         if (unlikely(offset >= txq->nb_tx_desc))
1839                 return -EINVAL;
1840
1841         desc = txq->tx_tail + offset;
1842         if (desc >= txq->nb_tx_desc)
1843                 desc -= txq->nb_tx_desc;
1844
1845         status = &txq->tx_ring[desc].wb.status;
1846         if (*status & rte_cpu_to_le_32(E1000_TXD_STAT_DD))
1847                 return RTE_ETH_TX_DESC_DONE;
1848
1849         return RTE_ETH_TX_DESC_FULL;
1850 }
1851
1852 void
1853 igb_dev_clear_queues(struct rte_eth_dev *dev)
1854 {
1855         uint16_t i;
1856         struct igb_tx_queue *txq;
1857         struct igb_rx_queue *rxq;
1858
1859         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1860                 txq = dev->data->tx_queues[i];
1861                 if (txq != NULL) {
1862                         igb_tx_queue_release_mbufs(txq);
1863                         igb_reset_tx_queue(txq, dev);
1864                 }
1865         }
1866
1867         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1868                 rxq = dev->data->rx_queues[i];
1869                 if (rxq != NULL) {
1870                         igb_rx_queue_release_mbufs(rxq);
1871                         igb_reset_rx_queue(rxq);
1872                 }
1873         }
1874 }
1875
1876 void
1877 igb_dev_free_queues(struct rte_eth_dev *dev)
1878 {
1879         uint16_t i;
1880
1881         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1882                 eth_igb_rx_queue_release(dev->data->rx_queues[i]);
1883                 dev->data->rx_queues[i] = NULL;
1884         }
1885         dev->data->nb_rx_queues = 0;
1886
1887         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1888                 eth_igb_tx_queue_release(dev->data->tx_queues[i]);
1889                 dev->data->tx_queues[i] = NULL;
1890         }
1891         dev->data->nb_tx_queues = 0;
1892 }
1893
1894 /**
1895  * Receive Side Scaling (RSS).
1896  * See section 7.1.1.7 in the following document:
1897  *     "Intel 82576 GbE Controller Datasheet" - Revision 2.45 October 2009
1898  *
1899  * Principles:
1900  * The source and destination IP addresses of the IP header and the source and
1901  * destination ports of TCP/UDP headers, if any, of received packets are hashed
1902  * against a configurable random key to compute a 32-bit RSS hash result.
1903  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
1904  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
1905  * RSS output index which is used as the RX queue index where to store the
1906  * received packets.
1907  * The following output is supplied in the RX write-back descriptor:
1908  *     - 32-bit result of the Microsoft RSS hash function,
1909  *     - 4-bit RSS type field.
1910  */
1911
1912 /*
1913  * RSS random key supplied in section 7.1.1.7.3 of the Intel 82576 datasheet.
1914  * Used as the default key.
1915  */
1916 static uint8_t rss_intel_key[40] = {
1917         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1918         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1919         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1920         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1921         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1922 };
1923
1924 static void
1925 igb_rss_disable(struct rte_eth_dev *dev)
1926 {
1927         struct e1000_hw *hw;
1928         uint32_t mrqc;
1929
1930         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1931         mrqc = E1000_READ_REG(hw, E1000_MRQC);
1932         mrqc &= ~E1000_MRQC_ENABLE_MASK;
1933         E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1934 }
1935
1936 static void
1937 igb_hw_rss_hash_set(struct e1000_hw *hw, struct rte_eth_rss_conf *rss_conf)
1938 {
1939         uint8_t  *hash_key;
1940         uint32_t rss_key;
1941         uint32_t mrqc;
1942         uint64_t rss_hf;
1943         uint16_t i;
1944
1945         hash_key = rss_conf->rss_key;
1946         if (hash_key != NULL) {
1947                 /* Fill in RSS hash key */
1948                 for (i = 0; i < 10; i++) {
1949                         rss_key  = hash_key[(i * 4)];
1950                         rss_key |= hash_key[(i * 4) + 1] << 8;
1951                         rss_key |= hash_key[(i * 4) + 2] << 16;
1952                         rss_key |= hash_key[(i * 4) + 3] << 24;
1953                         E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key);
1954                 }
1955         }
1956
1957         /* Set configured hashing protocols in MRQC register */
1958         rss_hf = rss_conf->rss_hf;
1959         mrqc = E1000_MRQC_ENABLE_RSS_4Q; /* RSS enabled. */
1960         if (rss_hf & ETH_RSS_IPV4)
1961                 mrqc |= E1000_MRQC_RSS_FIELD_IPV4;
1962         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1963                 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_TCP;
1964         if (rss_hf & ETH_RSS_IPV6)
1965                 mrqc |= E1000_MRQC_RSS_FIELD_IPV6;
1966         if (rss_hf & ETH_RSS_IPV6_EX)
1967                 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_EX;
1968         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1969                 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP;
1970         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
1971                 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
1972         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
1973                 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
1974         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
1975                 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
1976         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
1977                 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP_EX;
1978         E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1979 }
1980
1981 int
1982 eth_igb_rss_hash_update(struct rte_eth_dev *dev,
1983                         struct rte_eth_rss_conf *rss_conf)
1984 {
1985         struct e1000_hw *hw;
1986         uint32_t mrqc;
1987         uint64_t rss_hf;
1988
1989         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1990
1991         /*
1992          * Before changing anything, first check that the update RSS operation
1993          * does not attempt to disable RSS, if RSS was enabled at
1994          * initialization time, or does not attempt to enable RSS, if RSS was
1995          * disabled at initialization time.
1996          */
1997         rss_hf = rss_conf->rss_hf & IGB_RSS_OFFLOAD_ALL;
1998         mrqc = E1000_READ_REG(hw, E1000_MRQC);
1999         if (!(mrqc & E1000_MRQC_ENABLE_MASK)) { /* RSS disabled */
2000                 if (rss_hf != 0) /* Enable RSS */
2001                         return -(EINVAL);
2002                 return 0; /* Nothing to do */
2003         }
2004         /* RSS enabled */
2005         if (rss_hf == 0) /* Disable RSS */
2006                 return -(EINVAL);
2007         igb_hw_rss_hash_set(hw, rss_conf);
2008         return 0;
2009 }
2010
2011 int eth_igb_rss_hash_conf_get(struct rte_eth_dev *dev,
2012                               struct rte_eth_rss_conf *rss_conf)
2013 {
2014         struct e1000_hw *hw;
2015         uint8_t *hash_key;
2016         uint32_t rss_key;
2017         uint32_t mrqc;
2018         uint64_t rss_hf;
2019         uint16_t i;
2020
2021         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2022         hash_key = rss_conf->rss_key;
2023         if (hash_key != NULL) {
2024                 /* Return RSS hash key */
2025                 for (i = 0; i < 10; i++) {
2026                         rss_key = E1000_READ_REG_ARRAY(hw, E1000_RSSRK(0), i);
2027                         hash_key[(i * 4)] = rss_key & 0x000000FF;
2028                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
2029                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
2030                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
2031                 }
2032         }
2033
2034         /* Get RSS functions configured in MRQC register */
2035         mrqc = E1000_READ_REG(hw, E1000_MRQC);
2036         if ((mrqc & E1000_MRQC_ENABLE_RSS_4Q) == 0) { /* RSS is disabled */
2037                 rss_conf->rss_hf = 0;
2038                 return 0;
2039         }
2040         rss_hf = 0;
2041         if (mrqc & E1000_MRQC_RSS_FIELD_IPV4)
2042                 rss_hf |= ETH_RSS_IPV4;
2043         if (mrqc & E1000_MRQC_RSS_FIELD_IPV4_TCP)
2044                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
2045         if (mrqc & E1000_MRQC_RSS_FIELD_IPV6)
2046                 rss_hf |= ETH_RSS_IPV6;
2047         if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_EX)
2048                 rss_hf |= ETH_RSS_IPV6_EX;
2049         if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_TCP)
2050                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
2051         if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_TCP_EX)
2052                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
2053         if (mrqc & E1000_MRQC_RSS_FIELD_IPV4_UDP)
2054                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
2055         if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_UDP)
2056                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
2057         if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_UDP_EX)
2058                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
2059         rss_conf->rss_hf = rss_hf;
2060         return 0;
2061 }
2062
2063 static void
2064 igb_rss_configure(struct rte_eth_dev *dev)
2065 {
2066         struct rte_eth_rss_conf rss_conf;
2067         struct e1000_hw *hw;
2068         uint32_t shift;
2069         uint16_t i;
2070
2071         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2072
2073         /* Fill in redirection table. */
2074         shift = (hw->mac.type == e1000_82575) ? 6 : 0;
2075         for (i = 0; i < 128; i++) {
2076                 union e1000_reta {
2077                         uint32_t dword;
2078                         uint8_t  bytes[4];
2079                 } reta;
2080                 uint8_t q_idx;
2081
2082                 q_idx = (uint8_t) ((dev->data->nb_rx_queues > 1) ?
2083                                    i % dev->data->nb_rx_queues : 0);
2084                 reta.bytes[i & 3] = (uint8_t) (q_idx << shift);
2085                 if ((i & 3) == 3)
2086                         E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
2087         }
2088
2089         /*
2090          * Configure the RSS key and the RSS protocols used to compute
2091          * the RSS hash of input packets.
2092          */
2093         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
2094         if ((rss_conf.rss_hf & IGB_RSS_OFFLOAD_ALL) == 0) {
2095                 igb_rss_disable(dev);
2096                 return;
2097         }
2098         if (rss_conf.rss_key == NULL)
2099                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2100         igb_hw_rss_hash_set(hw, &rss_conf);
2101 }
2102
2103 /*
2104  * Check if the mac type support VMDq or not.
2105  * Return 1 if it supports, otherwise, return 0.
2106  */
2107 static int
2108 igb_is_vmdq_supported(const struct rte_eth_dev *dev)
2109 {
2110         const struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2111
2112         switch (hw->mac.type) {
2113         case e1000_82576:
2114         case e1000_82580:
2115         case e1000_i350:
2116                 return 1;
2117         case e1000_82540:
2118         case e1000_82541:
2119         case e1000_82542:
2120         case e1000_82543:
2121         case e1000_82544:
2122         case e1000_82545:
2123         case e1000_82546:
2124         case e1000_82547:
2125         case e1000_82571:
2126         case e1000_82572:
2127         case e1000_82573:
2128         case e1000_82574:
2129         case e1000_82583:
2130         case e1000_i210:
2131         case e1000_i211:
2132         default:
2133                 PMD_INIT_LOG(ERR, "Cannot support VMDq feature");
2134                 return 0;
2135         }
2136 }
2137
2138 static int
2139 igb_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
2140 {
2141         struct rte_eth_vmdq_rx_conf *cfg;
2142         struct e1000_hw *hw;
2143         uint32_t mrqc, vt_ctl, vmolr, rctl;
2144         int i;
2145
2146         PMD_INIT_FUNC_TRACE();
2147
2148         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2149         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
2150
2151         /* Check if mac type can support VMDq, return value of 0 means NOT support */
2152         if (igb_is_vmdq_supported(dev) == 0)
2153                 return -1;
2154
2155         igb_rss_disable(dev);
2156
2157         /* RCTL: eanble VLAN filter */
2158         rctl = E1000_READ_REG(hw, E1000_RCTL);
2159         rctl |= E1000_RCTL_VFE;
2160         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2161
2162         /* MRQC: enable vmdq */
2163         mrqc = E1000_READ_REG(hw, E1000_MRQC);
2164         mrqc |= E1000_MRQC_ENABLE_VMDQ;
2165         E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2166
2167         /* VTCTL:  pool selection according to VLAN tag */
2168         vt_ctl = E1000_READ_REG(hw, E1000_VT_CTL);
2169         if (cfg->enable_default_pool)
2170                 vt_ctl |= (cfg->default_pool << E1000_VT_CTL_DEFAULT_POOL_SHIFT);
2171         vt_ctl |= E1000_VT_CTL_IGNORE_MAC;
2172         E1000_WRITE_REG(hw, E1000_VT_CTL, vt_ctl);
2173
2174         for (i = 0; i < E1000_VMOLR_SIZE; i++) {
2175                 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
2176                 vmolr &= ~(E1000_VMOLR_AUPE | E1000_VMOLR_ROMPE |
2177                         E1000_VMOLR_ROPE | E1000_VMOLR_BAM |
2178                         E1000_VMOLR_MPME);
2179
2180                 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_UNTAG)
2181                         vmolr |= E1000_VMOLR_AUPE;
2182                 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_HASH_MC)
2183                         vmolr |= E1000_VMOLR_ROMPE;
2184                 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_HASH_UC)
2185                         vmolr |= E1000_VMOLR_ROPE;
2186                 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_BROADCAST)
2187                         vmolr |= E1000_VMOLR_BAM;
2188                 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_MULTICAST)
2189                         vmolr |= E1000_VMOLR_MPME;
2190
2191                 E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
2192         }
2193
2194         /*
2195          * VMOLR: set STRVLAN as 1 if IGMAC in VTCTL is set as 1
2196          * Both 82576 and 82580 support it
2197          */
2198         if (hw->mac.type != e1000_i350) {
2199                 for (i = 0; i < E1000_VMOLR_SIZE; i++) {
2200                         vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
2201                         vmolr |= E1000_VMOLR_STRVLAN;
2202                         E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
2203                 }
2204         }
2205
2206         /* VFTA - enable all vlan filters */
2207         for (i = 0; i < IGB_VFTA_SIZE; i++)
2208                 E1000_WRITE_REG(hw, (E1000_VFTA+(i*4)), UINT32_MAX);
2209
2210         /* VFRE: 8 pools enabling for rx, both 82576 and i350 support it */
2211         if (hw->mac.type != e1000_82580)
2212                 E1000_WRITE_REG(hw, E1000_VFRE, E1000_MBVFICR_VFREQ_MASK);
2213
2214         /*
2215          * RAH/RAL - allow pools to read specific mac addresses
2216          * In this case, all pools should be able to read from mac addr 0
2217          */
2218         E1000_WRITE_REG(hw, E1000_RAH(0), (E1000_RAH_AV | UINT16_MAX));
2219         E1000_WRITE_REG(hw, E1000_RAL(0), UINT32_MAX);
2220
2221         /* VLVF: set up filters for vlan tags as configured */
2222         for (i = 0; i < cfg->nb_pool_maps; i++) {
2223                 /* set vlan id in VF register and set the valid bit */
2224                 E1000_WRITE_REG(hw, E1000_VLVF(i), (E1000_VLVF_VLANID_ENABLE | \
2225                         (cfg->pool_map[i].vlan_id & ETH_VLAN_ID_MAX) | \
2226                         ((cfg->pool_map[i].pools << E1000_VLVF_POOLSEL_SHIFT ) & \
2227                         E1000_VLVF_POOLSEL_MASK)));
2228         }
2229
2230         E1000_WRITE_FLUSH(hw);
2231
2232         return 0;
2233 }
2234
2235
2236 /*********************************************************************
2237  *
2238  *  Enable receive unit.
2239  *
2240  **********************************************************************/
2241
2242 static int
2243 igb_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
2244 {
2245         struct igb_rx_entry *rxe = rxq->sw_ring;
2246         uint64_t dma_addr;
2247         unsigned i;
2248
2249         /* Initialize software ring entries. */
2250         for (i = 0; i < rxq->nb_rx_desc; i++) {
2251                 volatile union e1000_adv_rx_desc *rxd;
2252                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
2253
2254                 if (mbuf == NULL) {
2255                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed "
2256                                      "queue_id=%hu", rxq->queue_id);
2257                         return -ENOMEM;
2258                 }
2259                 dma_addr =
2260                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
2261                 rxd = &rxq->rx_ring[i];
2262                 rxd->read.hdr_addr = 0;
2263                 rxd->read.pkt_addr = dma_addr;
2264                 rxe[i].mbuf = mbuf;
2265         }
2266
2267         return 0;
2268 }
2269
2270 #define E1000_MRQC_DEF_Q_SHIFT               (3)
2271 static int
2272 igb_dev_mq_rx_configure(struct rte_eth_dev *dev)
2273 {
2274         struct e1000_hw *hw =
2275                 E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2276         uint32_t mrqc;
2277
2278         if (RTE_ETH_DEV_SRIOV(dev).active == ETH_8_POOLS) {
2279                 /*
2280                  * SRIOV active scheme
2281                  * FIXME if support RSS together with VMDq & SRIOV
2282                  */
2283                 mrqc = E1000_MRQC_ENABLE_VMDQ;
2284                 /* 011b Def_Q ignore, according to VT_CTL.DEF_PL */
2285                 mrqc |= 0x3 << E1000_MRQC_DEF_Q_SHIFT;
2286                 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2287         } else if(RTE_ETH_DEV_SRIOV(dev).active == 0) {
2288                 /*
2289                  * SRIOV inactive scheme
2290                  */
2291                 switch (dev->data->dev_conf.rxmode.mq_mode) {
2292                         case ETH_MQ_RX_RSS:
2293                                 igb_rss_configure(dev);
2294                                 break;
2295                         case ETH_MQ_RX_VMDQ_ONLY:
2296                                 /*Configure general VMDQ only RX parameters*/
2297                                 igb_vmdq_rx_hw_configure(dev);
2298                                 break;
2299                         case ETH_MQ_RX_NONE:
2300                                 /* if mq_mode is none, disable rss mode.*/
2301                         default:
2302                                 igb_rss_disable(dev);
2303                                 break;
2304                 }
2305         }
2306
2307         return 0;
2308 }
2309
2310 int
2311 eth_igb_rx_init(struct rte_eth_dev *dev)
2312 {
2313         struct rte_eth_rxmode *rxmode;
2314         struct e1000_hw     *hw;
2315         struct igb_rx_queue *rxq;
2316         uint32_t rctl;
2317         uint32_t rxcsum;
2318         uint32_t srrctl;
2319         uint16_t buf_size;
2320         uint16_t rctl_bsize;
2321         uint16_t i;
2322         int ret;
2323
2324         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2325         srrctl = 0;
2326
2327         /*
2328          * Make sure receives are disabled while setting
2329          * up the descriptor ring.
2330          */
2331         rctl = E1000_READ_REG(hw, E1000_RCTL);
2332         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2333
2334         rxmode = &dev->data->dev_conf.rxmode;
2335
2336         /*
2337          * Configure support of jumbo frames, if any.
2338          */
2339         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
2340                 rctl |= E1000_RCTL_LPE;
2341
2342                 /*
2343                  * Set maximum packet length by default, and might be updated
2344                  * together with enabling/disabling dual VLAN.
2345                  */
2346                 E1000_WRITE_REG(hw, E1000_RLPML,
2347                         dev->data->dev_conf.rxmode.max_rx_pkt_len +
2348                                                 VLAN_TAG_SIZE);
2349         } else
2350                 rctl &= ~E1000_RCTL_LPE;
2351
2352         /* Configure and enable each RX queue. */
2353         rctl_bsize = 0;
2354         dev->rx_pkt_burst = eth_igb_recv_pkts;
2355         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2356                 uint64_t bus_addr;
2357                 uint32_t rxdctl;
2358
2359                 rxq = dev->data->rx_queues[i];
2360
2361                 rxq->flags = 0;
2362                 /*
2363                  * i350 and i354 vlan packets have vlan tags byte swapped.
2364                  */
2365                 if (hw->mac.type == e1000_i350 || hw->mac.type == e1000_i354) {
2366                         rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
2367                         PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
2368                 } else {
2369                         PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
2370                 }
2371
2372                 /* Allocate buffers for descriptor rings and set up queue */
2373                 ret = igb_alloc_rx_queue_mbufs(rxq);
2374                 if (ret)
2375                         return ret;
2376
2377                 /*
2378                  * Reset crc_len in case it was changed after queue setup by a
2379                  *  call to configure
2380                  */
2381                 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
2382                         rxq->crc_len = RTE_ETHER_CRC_LEN;
2383                 else
2384                         rxq->crc_len = 0;
2385
2386                 bus_addr = rxq->rx_ring_phys_addr;
2387                 E1000_WRITE_REG(hw, E1000_RDLEN(rxq->reg_idx),
2388                                 rxq->nb_rx_desc *
2389                                 sizeof(union e1000_adv_rx_desc));
2390                 E1000_WRITE_REG(hw, E1000_RDBAH(rxq->reg_idx),
2391                                 (uint32_t)(bus_addr >> 32));
2392                 E1000_WRITE_REG(hw, E1000_RDBAL(rxq->reg_idx), (uint32_t)bus_addr);
2393
2394                 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2395
2396                 /*
2397                  * Configure RX buffer size.
2398                  */
2399                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
2400                         RTE_PKTMBUF_HEADROOM);
2401                 if (buf_size >= 1024) {
2402                         /*
2403                          * Configure the BSIZEPACKET field of the SRRCTL
2404                          * register of the queue.
2405                          * Value is in 1 KB resolution, from 1 KB to 127 KB.
2406                          * If this field is equal to 0b, then RCTL.BSIZE
2407                          * determines the RX packet buffer size.
2408                          */
2409                         srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
2410                                    E1000_SRRCTL_BSIZEPKT_MASK);
2411                         buf_size = (uint16_t) ((srrctl &
2412                                                 E1000_SRRCTL_BSIZEPKT_MASK) <<
2413                                                E1000_SRRCTL_BSIZEPKT_SHIFT);
2414
2415                         /* It adds dual VLAN length for supporting dual VLAN */
2416                         if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
2417                                                 2 * VLAN_TAG_SIZE) > buf_size){
2418                                 if (!dev->data->scattered_rx)
2419                                         PMD_INIT_LOG(DEBUG,
2420                                                      "forcing scatter mode");
2421                                 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2422                                 dev->data->scattered_rx = 1;
2423                         }
2424                 } else {
2425                         /*
2426                          * Use BSIZE field of the device RCTL register.
2427                          */
2428                         if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
2429                                 rctl_bsize = buf_size;
2430                         if (!dev->data->scattered_rx)
2431                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2432                         dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2433                         dev->data->scattered_rx = 1;
2434                 }
2435
2436                 /* Set if packets are dropped when no descriptors available */
2437                 if (rxq->drop_en)
2438                         srrctl |= E1000_SRRCTL_DROP_EN;
2439
2440                 E1000_WRITE_REG(hw, E1000_SRRCTL(rxq->reg_idx), srrctl);
2441
2442                 /* Enable this RX queue. */
2443                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(rxq->reg_idx));
2444                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2445                 rxdctl &= 0xFFF00000;
2446                 rxdctl |= (rxq->pthresh & 0x1F);
2447                 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
2448                 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
2449                 E1000_WRITE_REG(hw, E1000_RXDCTL(rxq->reg_idx), rxdctl);
2450         }
2451
2452         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_SCATTER) {
2453                 if (!dev->data->scattered_rx)
2454                         PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2455                 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2456                 dev->data->scattered_rx = 1;
2457         }
2458
2459         /*
2460          * Setup BSIZE field of RCTL register, if needed.
2461          * Buffer sizes >= 1024 are not [supposed to be] setup in the RCTL
2462          * register, since the code above configures the SRRCTL register of
2463          * the RX queue in such a case.
2464          * All configurable sizes are:
2465          * 16384: rctl |= (E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX);
2466          *  8192: rctl |= (E1000_RCTL_SZ_8192  | E1000_RCTL_BSEX);
2467          *  4096: rctl |= (E1000_RCTL_SZ_4096  | E1000_RCTL_BSEX);
2468          *  2048: rctl |= E1000_RCTL_SZ_2048;
2469          *  1024: rctl |= E1000_RCTL_SZ_1024;
2470          *   512: rctl |= E1000_RCTL_SZ_512;
2471          *   256: rctl |= E1000_RCTL_SZ_256;
2472          */
2473         if (rctl_bsize > 0) {
2474                 if (rctl_bsize >= 512) /* 512 <= buf_size < 1024 - use 512 */
2475                         rctl |= E1000_RCTL_SZ_512;
2476                 else /* 256 <= buf_size < 512 - use 256 */
2477                         rctl |= E1000_RCTL_SZ_256;
2478         }
2479
2480         /*
2481          * Configure RSS if device configured with multiple RX queues.
2482          */
2483         igb_dev_mq_rx_configure(dev);
2484
2485         /* Update the rctl since igb_dev_mq_rx_configure may change its value */
2486         rctl |= E1000_READ_REG(hw, E1000_RCTL);
2487
2488         /*
2489          * Setup the Checksum Register.
2490          * Receive Full-Packet Checksum Offload is mutually exclusive with RSS.
2491          */
2492         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
2493         rxcsum |= E1000_RXCSUM_PCSD;
2494
2495         /* Enable both L3/L4 rx checksum offload */
2496         if (rxmode->offloads & DEV_RX_OFFLOAD_IPV4_CKSUM)
2497                 rxcsum |= E1000_RXCSUM_IPOFL;
2498         else
2499                 rxcsum &= ~E1000_RXCSUM_IPOFL;
2500         if (rxmode->offloads &
2501                 (DEV_RX_OFFLOAD_TCP_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM))
2502                 rxcsum |= E1000_RXCSUM_TUOFL;
2503         else
2504                 rxcsum &= ~E1000_RXCSUM_TUOFL;
2505         if (rxmode->offloads & DEV_RX_OFFLOAD_CHECKSUM)
2506                 rxcsum |= E1000_RXCSUM_CRCOFL;
2507         else
2508                 rxcsum &= ~E1000_RXCSUM_CRCOFL;
2509
2510         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2511
2512         /* Setup the Receive Control Register. */
2513         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC) {
2514                 rctl &= ~E1000_RCTL_SECRC; /* Do not Strip Ethernet CRC. */
2515
2516                 /* clear STRCRC bit in all queues */
2517                 if (hw->mac.type == e1000_i350 ||
2518                     hw->mac.type == e1000_i210 ||
2519                     hw->mac.type == e1000_i211 ||
2520                     hw->mac.type == e1000_i354) {
2521                         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2522                                 rxq = dev->data->rx_queues[i];
2523                                 uint32_t dvmolr = E1000_READ_REG(hw,
2524                                         E1000_DVMOLR(rxq->reg_idx));
2525                                 dvmolr &= ~E1000_DVMOLR_STRCRC;
2526                                 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
2527                         }
2528                 }
2529         } else {
2530                 rctl |= E1000_RCTL_SECRC; /* Strip Ethernet CRC. */
2531
2532                 /* set STRCRC bit in all queues */
2533                 if (hw->mac.type == e1000_i350 ||
2534                     hw->mac.type == e1000_i210 ||
2535                     hw->mac.type == e1000_i211 ||
2536                     hw->mac.type == e1000_i354) {
2537                         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2538                                 rxq = dev->data->rx_queues[i];
2539                                 uint32_t dvmolr = E1000_READ_REG(hw,
2540                                         E1000_DVMOLR(rxq->reg_idx));
2541                                 dvmolr |= E1000_DVMOLR_STRCRC;
2542                                 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
2543                         }
2544                 }
2545         }
2546
2547         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2548         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
2549                 E1000_RCTL_RDMTS_HALF |
2550                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2551
2552         /* Make sure VLAN Filters are off. */
2553         if (dev->data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_VMDQ_ONLY)
2554                 rctl &= ~E1000_RCTL_VFE;
2555         /* Don't store bad packets. */
2556         rctl &= ~E1000_RCTL_SBP;
2557
2558         /* Enable Receives. */
2559         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2560
2561         /*
2562          * Setup the HW Rx Head and Tail Descriptor Pointers.
2563          * This needs to be done after enable.
2564          */
2565         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2566                 rxq = dev->data->rx_queues[i];
2567                 E1000_WRITE_REG(hw, E1000_RDH(rxq->reg_idx), 0);
2568                 E1000_WRITE_REG(hw, E1000_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
2569         }
2570
2571         return 0;
2572 }
2573
2574 /*********************************************************************
2575  *
2576  *  Enable transmit unit.
2577  *
2578  **********************************************************************/
2579 void
2580 eth_igb_tx_init(struct rte_eth_dev *dev)
2581 {
2582         struct e1000_hw     *hw;
2583         struct igb_tx_queue *txq;
2584         uint32_t tctl;
2585         uint32_t txdctl;
2586         uint16_t i;
2587
2588         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2589
2590         /* Setup the Base and Length of the Tx Descriptor Rings. */
2591         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2592                 uint64_t bus_addr;
2593                 txq = dev->data->tx_queues[i];
2594                 bus_addr = txq->tx_ring_phys_addr;
2595
2596                 E1000_WRITE_REG(hw, E1000_TDLEN(txq->reg_idx),
2597                                 txq->nb_tx_desc *
2598                                 sizeof(union e1000_adv_tx_desc));
2599                 E1000_WRITE_REG(hw, E1000_TDBAH(txq->reg_idx),
2600                                 (uint32_t)(bus_addr >> 32));
2601                 E1000_WRITE_REG(hw, E1000_TDBAL(txq->reg_idx), (uint32_t)bus_addr);
2602
2603                 /* Setup the HW Tx Head and Tail descriptor pointers. */
2604                 E1000_WRITE_REG(hw, E1000_TDT(txq->reg_idx), 0);
2605                 E1000_WRITE_REG(hw, E1000_TDH(txq->reg_idx), 0);
2606
2607                 /* Setup Transmit threshold registers. */
2608                 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(txq->reg_idx));
2609                 txdctl |= txq->pthresh & 0x1F;
2610                 txdctl |= ((txq->hthresh & 0x1F) << 8);
2611                 txdctl |= ((txq->wthresh & 0x1F) << 16);
2612                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2613                 E1000_WRITE_REG(hw, E1000_TXDCTL(txq->reg_idx), txdctl);
2614         }
2615
2616         /* Program the Transmit Control Register. */
2617         tctl = E1000_READ_REG(hw, E1000_TCTL);
2618         tctl &= ~E1000_TCTL_CT;
2619         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
2620                  (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
2621
2622         e1000_config_collision_dist(hw);
2623
2624         /* This write will effectively turn on the transmit unit. */
2625         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
2626 }
2627
2628 /*********************************************************************
2629  *
2630  *  Enable VF receive unit.
2631  *
2632  **********************************************************************/
2633 int
2634 eth_igbvf_rx_init(struct rte_eth_dev *dev)
2635 {
2636         struct e1000_hw     *hw;
2637         struct igb_rx_queue *rxq;
2638         uint32_t srrctl;
2639         uint16_t buf_size;
2640         uint16_t rctl_bsize;
2641         uint16_t i;
2642         int ret;
2643
2644         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2645
2646         /* setup MTU */
2647         e1000_rlpml_set_vf(hw,
2648                 (uint16_t)(dev->data->dev_conf.rxmode.max_rx_pkt_len +
2649                 VLAN_TAG_SIZE));
2650
2651         /* Configure and enable each RX queue. */
2652         rctl_bsize = 0;
2653         dev->rx_pkt_burst = eth_igb_recv_pkts;
2654         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2655                 uint64_t bus_addr;
2656                 uint32_t rxdctl;
2657
2658                 rxq = dev->data->rx_queues[i];
2659
2660                 rxq->flags = 0;
2661                 /*
2662                  * i350VF LB vlan packets have vlan tags byte swapped.
2663                  */
2664                 if (hw->mac.type == e1000_vfadapt_i350) {
2665                         rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
2666                         PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
2667                 } else {
2668                         PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
2669                 }
2670
2671                 /* Allocate buffers for descriptor rings and set up queue */
2672                 ret = igb_alloc_rx_queue_mbufs(rxq);
2673                 if (ret)
2674                         return ret;
2675
2676                 bus_addr = rxq->rx_ring_phys_addr;
2677                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
2678                                 rxq->nb_rx_desc *
2679                                 sizeof(union e1000_adv_rx_desc));
2680                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
2681                                 (uint32_t)(bus_addr >> 32));
2682                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr);
2683
2684                 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2685
2686                 /*
2687                  * Configure RX buffer size.
2688                  */
2689                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
2690                         RTE_PKTMBUF_HEADROOM);
2691                 if (buf_size >= 1024) {
2692                         /*
2693                          * Configure the BSIZEPACKET field of the SRRCTL
2694                          * register of the queue.
2695                          * Value is in 1 KB resolution, from 1 KB to 127 KB.
2696                          * If this field is equal to 0b, then RCTL.BSIZE
2697                          * determines the RX packet buffer size.
2698                          */
2699                         srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
2700                                    E1000_SRRCTL_BSIZEPKT_MASK);
2701                         buf_size = (uint16_t) ((srrctl &
2702                                                 E1000_SRRCTL_BSIZEPKT_MASK) <<
2703                                                E1000_SRRCTL_BSIZEPKT_SHIFT);
2704
2705                         /* It adds dual VLAN length for supporting dual VLAN */
2706                         if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
2707                                                 2 * VLAN_TAG_SIZE) > buf_size){
2708                                 if (!dev->data->scattered_rx)
2709                                         PMD_INIT_LOG(DEBUG,
2710                                                      "forcing scatter mode");
2711                                 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2712                                 dev->data->scattered_rx = 1;
2713                         }
2714                 } else {
2715                         /*
2716                          * Use BSIZE field of the device RCTL register.
2717                          */
2718                         if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
2719                                 rctl_bsize = buf_size;
2720                         if (!dev->data->scattered_rx)
2721                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2722                         dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2723                         dev->data->scattered_rx = 1;
2724                 }
2725
2726                 /* Set if packets are dropped when no descriptors available */
2727                 if (rxq->drop_en)
2728                         srrctl |= E1000_SRRCTL_DROP_EN;
2729
2730                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
2731
2732                 /* Enable this RX queue. */
2733                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
2734                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2735                 rxdctl &= 0xFFF00000;
2736                 rxdctl |= (rxq->pthresh & 0x1F);
2737                 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
2738                 if (hw->mac.type == e1000_vfadapt) {
2739                         /*
2740                          * Workaround of 82576 VF Erratum
2741                          * force set WTHRESH to 1
2742                          * to avoid Write-Back not triggered sometimes
2743                          */
2744                         rxdctl |= 0x10000;
2745                         PMD_INIT_LOG(DEBUG, "Force set RX WTHRESH to 1 !");
2746                 }
2747                 else
2748                         rxdctl |= ((rxq->wthresh & 0x1F) << 16);
2749                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
2750         }
2751
2752         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_SCATTER) {
2753                 if (!dev->data->scattered_rx)
2754                         PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2755                 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2756                 dev->data->scattered_rx = 1;
2757         }
2758
2759         /*
2760          * Setup the HW Rx Head and Tail Descriptor Pointers.
2761          * This needs to be done after enable.
2762          */
2763         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2764                 rxq = dev->data->rx_queues[i];
2765                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
2766                 E1000_WRITE_REG(hw, E1000_RDT(i), rxq->nb_rx_desc - 1);
2767         }
2768
2769         return 0;
2770 }
2771
2772 /*********************************************************************
2773  *
2774  *  Enable VF transmit unit.
2775  *
2776  **********************************************************************/
2777 void
2778 eth_igbvf_tx_init(struct rte_eth_dev *dev)
2779 {
2780         struct e1000_hw     *hw;
2781         struct igb_tx_queue *txq;
2782         uint32_t txdctl;
2783         uint16_t i;
2784
2785         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2786
2787         /* Setup the Base and Length of the Tx Descriptor Rings. */
2788         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2789                 uint64_t bus_addr;
2790
2791                 txq = dev->data->tx_queues[i];
2792                 bus_addr = txq->tx_ring_phys_addr;
2793                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
2794                                 txq->nb_tx_desc *
2795                                 sizeof(union e1000_adv_tx_desc));
2796                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
2797                                 (uint32_t)(bus_addr >> 32));
2798                 E1000_WRITE_REG(hw, E1000_TDBAL(i), (uint32_t)bus_addr);
2799
2800                 /* Setup the HW Tx Head and Tail descriptor pointers. */
2801                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
2802                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
2803
2804                 /* Setup Transmit threshold registers. */
2805                 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
2806                 txdctl |= txq->pthresh & 0x1F;
2807                 txdctl |= ((txq->hthresh & 0x1F) << 8);
2808                 if (hw->mac.type == e1000_82576) {
2809                         /*
2810                          * Workaround of 82576 VF Erratum
2811                          * force set WTHRESH to 1
2812                          * to avoid Write-Back not triggered sometimes
2813                          */
2814                         txdctl |= 0x10000;
2815                         PMD_INIT_LOG(DEBUG, "Force set TX WTHRESH to 1 !");
2816                 }
2817                 else
2818                         txdctl |= ((txq->wthresh & 0x1F) << 16);
2819                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2820                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
2821         }
2822
2823 }
2824
2825 void
2826 igb_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2827         struct rte_eth_rxq_info *qinfo)
2828 {
2829         struct igb_rx_queue *rxq;
2830
2831         rxq = dev->data->rx_queues[queue_id];
2832
2833         qinfo->mp = rxq->mb_pool;
2834         qinfo->scattered_rx = dev->data->scattered_rx;
2835         qinfo->nb_desc = rxq->nb_rx_desc;
2836
2837         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
2838         qinfo->conf.rx_drop_en = rxq->drop_en;
2839         qinfo->conf.offloads = rxq->offloads;
2840 }
2841
2842 void
2843 igb_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2844         struct rte_eth_txq_info *qinfo)
2845 {
2846         struct igb_tx_queue *txq;
2847
2848         txq = dev->data->tx_queues[queue_id];
2849
2850         qinfo->nb_desc = txq->nb_tx_desc;
2851
2852         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
2853         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
2854         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
2855         qinfo->conf.offloads = txq->offloads;
2856 }
2857
2858 int
2859 igb_rss_conf_init(struct rte_eth_dev *dev,
2860                   struct igb_rte_flow_rss_conf *out,
2861                   const struct rte_flow_action_rss *in)
2862 {
2863         struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2864
2865         if (in->key_len > RTE_DIM(out->key) ||
2866             ((hw->mac.type == e1000_82576) &&
2867              (in->queue_num > IGB_MAX_RX_QUEUE_NUM_82576)) ||
2868             ((hw->mac.type != e1000_82576) &&
2869              (in->queue_num > IGB_MAX_RX_QUEUE_NUM)))
2870                 return -EINVAL;
2871         out->conf = (struct rte_flow_action_rss){
2872                 .func = in->func,
2873                 .level = in->level,
2874                 .types = in->types,
2875                 .key_len = in->key_len,
2876                 .queue_num = in->queue_num,
2877                 .key = memcpy(out->key, in->key, in->key_len),
2878                 .queue = memcpy(out->queue, in->queue,
2879                                 sizeof(*in->queue) * in->queue_num),
2880         };
2881         return 0;
2882 }
2883
2884 int
2885 igb_action_rss_same(const struct rte_flow_action_rss *comp,
2886                     const struct rte_flow_action_rss *with)
2887 {
2888         return (comp->func == with->func &&
2889                 comp->level == with->level &&
2890                 comp->types == with->types &&
2891                 comp->key_len == with->key_len &&
2892                 comp->queue_num == with->queue_num &&
2893                 !memcmp(comp->key, with->key, with->key_len) &&
2894                 !memcmp(comp->queue, with->queue,
2895                         sizeof(*with->queue) * with->queue_num));
2896 }
2897
2898 int
2899 igb_config_rss_filter(struct rte_eth_dev *dev,
2900                 struct igb_rte_flow_rss_conf *conf, bool add)
2901 {
2902         uint32_t shift;
2903         uint16_t i, j;
2904         struct rte_eth_rss_conf rss_conf = {
2905                 .rss_key = conf->conf.key_len ?
2906                         (void *)(uintptr_t)conf->conf.key : NULL,
2907                 .rss_key_len = conf->conf.key_len,
2908                 .rss_hf = conf->conf.types,
2909         };
2910         struct e1000_filter_info *filter_info =
2911                 E1000_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
2912         struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2913
2914         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2915
2916         if (!add) {
2917                 if (igb_action_rss_same(&filter_info->rss_info.conf,
2918                                         &conf->conf)) {
2919                         igb_rss_disable(dev);
2920                         memset(&filter_info->rss_info, 0,
2921                                 sizeof(struct igb_rte_flow_rss_conf));
2922                         return 0;
2923                 }
2924                 return -EINVAL;
2925         }
2926
2927         if (filter_info->rss_info.conf.queue_num)
2928                 return -EINVAL;
2929
2930         /* Fill in redirection table. */
2931         shift = (hw->mac.type == e1000_82575) ? 6 : 0;
2932         for (i = 0, j = 0; i < 128; i++, j++) {
2933                 union e1000_reta {
2934                         uint32_t dword;
2935                         uint8_t  bytes[4];
2936                 } reta;
2937                 uint8_t q_idx;
2938
2939                 if (j == conf->conf.queue_num)
2940                         j = 0;
2941                 q_idx = conf->conf.queue[j];
2942                 reta.bytes[i & 3] = (uint8_t)(q_idx << shift);
2943                 if ((i & 3) == 3)
2944                         E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
2945         }
2946
2947         /* Configure the RSS key and the RSS protocols used to compute
2948          * the RSS hash of input packets.
2949          */
2950         if ((rss_conf.rss_hf & IGB_RSS_OFFLOAD_ALL) == 0) {
2951                 igb_rss_disable(dev);
2952                 return 0;
2953         }
2954         if (rss_conf.rss_key == NULL)
2955                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2956         igb_hw_rss_hash_set(hw, &rss_conf);
2957
2958         if (igb_rss_conf_init(dev, &filter_info->rss_info, &conf->conf))
2959                 return -EINVAL;
2960
2961         return 0;
2962 }