add prefix to cache line macros
[dpdk.git] / lib / librte_pmd_i40e / i40e_rxtx.c
index 2b53677..2d2ef04 100644 (file)
@@ -105,6 +105,10 @@ i40e_rxd_status_to_pkt_flags(uint64_t qword)
                                        I40E_RX_DESC_FLTSTAT_RSS_HASH) ==
                        I40E_RX_DESC_FLTSTAT_RSS_HASH) ? PKT_RX_RSS_HASH : 0;
 
+       /* Check if FDIR Match */
+       flags |= (qword & (1 << I40E_RX_DESC_STATUS_FLM_SHIFT) ?
+                                                       PKT_RX_FDIR : 0);
+
        return flags;
 }
 
@@ -208,34 +212,34 @@ i40e_rxd_ptype_to_pkt_flags(uint64_t qword)
                PKT_RX_IPV4_HDR_EXT, /* PTYPE 56 */
                PKT_RX_IPV4_HDR_EXT, /* PTYPE 57 */
                PKT_RX_IPV4_HDR_EXT, /* PTYPE 58 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 59 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 60 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 61 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 59 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 60 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 61 */
                0, /* PTYPE 62 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 63 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 64 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 65 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 66 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 67 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 68 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 63 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 64 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 65 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 66 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 67 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 68 */
                0, /* PTYPE 69 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 70 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 71 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 72 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 73 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 74 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 75 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 76 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 70 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 71 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 72 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 73 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 74 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 75 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 76 */
                0, /* PTYPE 77 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 78 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 79 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 80 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 81 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 82 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 83 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 78 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 79 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 80 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 81 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 82 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 83 */
                0, /* PTYPE 84 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 85 */
-               PKT_RX_IPV4_HDR_EXT, /* PTYPE 86 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 85 */
+               PKT_RX_TUNNEL_IPV4_HDR, /* PTYPE 86 */
                PKT_RX_IPV4_HDR_EXT, /* PTYPE 87 */
                PKT_RX_IPV6_HDR, /* PTYPE 88 */
                PKT_RX_IPV6_HDR, /* PTYPE 89 */
@@ -274,34 +278,34 @@ i40e_rxd_ptype_to_pkt_flags(uint64_t qword)
                PKT_RX_IPV6_HDR_EXT, /* PTYPE 122 */
                PKT_RX_IPV6_HDR_EXT, /* PTYPE 123 */
                PKT_RX_IPV6_HDR_EXT, /* PTYPE 124 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 125 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 126 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 127 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 125 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 126 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 127 */
                0, /* PTYPE 128 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 129 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 130 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 131 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 132 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 133 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 134 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 129 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 130 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 131 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 132 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 133 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 134 */
                0, /* PTYPE 135 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 136 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 137 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 138 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 139 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 140 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 141 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 142 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 136 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 137 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 138 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 139 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 140 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 141 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 142 */
                0, /* PTYPE 143 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 144 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 145 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 146 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 147 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 148 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 149 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 144 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 145 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 146 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 147 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 148 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 149 */
                0, /* PTYPE 150 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 151 */
-               PKT_RX_IPV6_HDR_EXT, /* PTYPE 152 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 151 */
+               PKT_RX_TUNNEL_IPV6_HDR, /* PTYPE 152 */
                PKT_RX_IPV6_HDR_EXT, /* PTYPE 153 */
                0, /* PTYPE 154 */
                0, /* PTYPE 155 */
@@ -410,12 +414,57 @@ i40e_rxd_ptype_to_pkt_flags(uint64_t qword)
        return ip_ptype_map[ptype];
 }
 
+#define I40E_RX_DESC_EXT_STATUS_FLEXBH_MASK   0x03
+#define I40E_RX_DESC_EXT_STATUS_FLEXBH_FD_ID  0x01
+#define I40E_RX_DESC_EXT_STATUS_FLEXBH_FLEX   0x02
+#define I40E_RX_DESC_EXT_STATUS_FLEXBL_MASK   0x03
+#define I40E_RX_DESC_EXT_STATUS_FLEXBL_FLEX   0x01
+
+static inline uint64_t
+i40e_rxd_build_fdir(volatile union i40e_rx_desc *rxdp, struct rte_mbuf *mb)
+{
+       uint64_t flags = 0;
+       uint16_t flexbh, flexbl;
+
+#ifdef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+       mb->hash.fdir.hi =
+               rte_le_to_cpu_32(rxdp->wb.qword0.hi_dword.fd);
+       flags |= PKT_RX_FDIR_ID;
+#else
+       flexbh = (rte_le_to_cpu_32(rxdp->wb.qword2.ext_status) >>
+               I40E_RX_DESC_EXT_STATUS_FLEXBH_SHIFT) &
+               I40E_RX_DESC_EXT_STATUS_FLEXBH_MASK;
+       flexbl = (rte_le_to_cpu_32(rxdp->wb.qword2.ext_status) >>
+               I40E_RX_DESC_EXT_STATUS_FLEXBL_SHIFT) &
+               I40E_RX_DESC_EXT_STATUS_FLEXBL_MASK;
+
+
+       if (flexbh == I40E_RX_DESC_EXT_STATUS_FLEXBH_FD_ID) {
+               mb->hash.fdir.hi =
+                       rte_le_to_cpu_32(rxdp->wb.qword3.hi_dword.fd_id);
+               flags |= PKT_RX_FDIR_ID;
+       } else if (flexbh == I40E_RX_DESC_EXT_STATUS_FLEXBH_FLEX) {
+               mb->hash.fdir.hi =
+                       rte_le_to_cpu_32(rxdp->wb.qword3.hi_dword.flex_bytes_hi);
+               flags |= PKT_RX_FDIR_FLX;
+       }
+       if (flexbl == I40E_RX_DESC_EXT_STATUS_FLEXBL_FLEX) {
+               mb->hash.fdir.lo =
+                       rte_le_to_cpu_32(rxdp->wb.qword3.lo_dword.flex_bytes_lo);
+               flags |= PKT_RX_FDIR_FLX;
+       }
+#endif
+       return flags;
+}
 static inline void
-i40e_txd_enable_checksum(uint32_t ol_flags,
+i40e_txd_enable_checksum(uint64_t ol_flags,
                        uint32_t *td_cmd,
                        uint32_t *td_offset,
                        uint8_t l2_len,
-                       uint8_t l3_len)
+                       uint16_t l3_len,
+                       uint8_t inner_l2_len,
+                       uint16_t inner_l3_len,
+                       uint32_t *cd_tunneling)
 {
        if (!l2_len) {
                PMD_DRV_LOG(DEBUG, "L2 length set to 0");
@@ -428,6 +477,27 @@ i40e_txd_enable_checksum(uint32_t ol_flags,
                return;
        }
 
+       /* VXLAN packet TX checksum offload */
+       if (unlikely(ol_flags & PKT_TX_VXLAN_CKSUM)) {
+               uint8_t l4tun_len;
+
+               l4tun_len = ETHER_VXLAN_HLEN + inner_l2_len;
+
+               if (ol_flags & PKT_TX_IPV4_CSUM)
+                       *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
+               else if (ol_flags & PKT_TX_IPV6)
+                       *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
+
+               /* Now set the ctx descriptor fields */
+               *cd_tunneling |= (l3_len >> 2) <<
+                               I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
+                               I40E_TXD_CTX_UDP_TUNNELING |
+                               (l4tun_len >> 1) <<
+                               I40E_TXD_CTX_QW0_NATLEN_SHIFT;
+
+               l3_len = inner_l3_len;
+       }
+
        /* Enable L3 checksum offloads */
        if (ol_flags & PKT_TX_IPV4_CSUM) {
                *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
@@ -637,10 +707,17 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
                        pkt_flags = i40e_rxd_status_to_pkt_flags(qword1);
                        pkt_flags |= i40e_rxd_error_to_pkt_flags(qword1);
                        pkt_flags |= i40e_rxd_ptype_to_pkt_flags(qword1);
-                       mb->ol_flags = pkt_flags;
+
+                       mb->packet_type = (uint16_t)((qword1 &
+                                       I40E_RXD_QW1_PTYPE_MASK) >>
+                                       I40E_RXD_QW1_PTYPE_SHIFT);
                        if (pkt_flags & PKT_RX_RSS_HASH)
                                mb->hash.rss = rte_le_to_cpu_32(\
-                                       rxdp->wb.qword0.hi_dword.rss);
+                                       rxdp[j].wb.qword0.hi_dword.rss);
+                       if (pkt_flags & PKT_RX_FDIR)
+                               pkt_flags |= i40e_rxd_build_fdir(&rxdp[j], mb);
+
+                       mb->ol_flags = pkt_flags;
                }
 
                for (j = 0; j < I40E_LOOK_AHEAD; j++)
@@ -873,10 +950,15 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                pkt_flags = i40e_rxd_status_to_pkt_flags(qword1);
                pkt_flags |= i40e_rxd_error_to_pkt_flags(qword1);
                pkt_flags |= i40e_rxd_ptype_to_pkt_flags(qword1);
-               rxm->ol_flags = pkt_flags;
+               rxm->packet_type = (uint16_t)((qword1 & I40E_RXD_QW1_PTYPE_MASK) >>
+                               I40E_RXD_QW1_PTYPE_SHIFT);
                if (pkt_flags & PKT_RX_RSS_HASH)
                        rxm->hash.rss =
                                rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
+               if (pkt_flags & PKT_RX_FDIR)
+                       pkt_flags |= i40e_rxd_build_fdir(&rxd, rxm);
+
+               rxm->ol_flags = pkt_flags;
 
                rx_pkts[nb_rx++] = rxm;
        }
@@ -1027,10 +1109,16 @@ i40e_recv_scattered_pkts(void *rx_queue,
                pkt_flags = i40e_rxd_status_to_pkt_flags(qword1);
                pkt_flags |= i40e_rxd_error_to_pkt_flags(qword1);
                pkt_flags |= i40e_rxd_ptype_to_pkt_flags(qword1);
-               first_seg->ol_flags = pkt_flags;
+               first_seg->packet_type = (uint16_t)((qword1 &
+                                       I40E_RXD_QW1_PTYPE_MASK) >>
+                                       I40E_RXD_QW1_PTYPE_SHIFT);
                if (pkt_flags & PKT_RX_RSS_HASH)
                        rxm->hash.rss =
                                rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
+               if (pkt_flags & PKT_RX_FDIR)
+                       pkt_flags |= i40e_rxd_build_fdir(&rxd, rxm);
+
+               first_seg->ol_flags = pkt_flags;
 
                /* Prefetch data of first segment, if configured to do so. */
                rte_prefetch0(RTE_PTR_ADD(first_seg->buf_addr,
@@ -1068,7 +1156,10 @@ i40e_recv_scattered_pkts(void *rx_queue,
 static inline uint16_t
 i40e_calc_context_desc(uint64_t flags)
 {
-       uint16_t mask = 0;
+       uint64_t mask = 0ULL;
+
+       if (flags | PKT_TX_VXLAN_CKSUM)
+               mask |= PKT_TX_VXLAN_CKSUM;
 
 #ifdef RTE_LIBRTE_IEEE1588
        mask |= PKT_TX_IEEE1588_TMST;
@@ -1089,6 +1180,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
        volatile struct i40e_tx_desc *txr;
        struct rte_mbuf *tx_pkt;
        struct rte_mbuf *m_seg;
+       uint32_t cd_tunneling_params;
        uint16_t tx_id;
        uint16_t nb_tx;
        uint32_t td_cmd;
@@ -1097,7 +1189,9 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
        uint32_t td_tag;
        uint64_t ol_flags;
        uint8_t l2_len;
-       uint8_t l3_len;
+       uint16_t l3_len;
+       uint8_t inner_l2_len;
+       uint16_t inner_l3_len;
        uint16_t nb_used;
        uint16_t nb_ctx;
        uint16_t tx_last;
@@ -1125,7 +1219,9 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 
                ol_flags = tx_pkt->ol_flags;
                l2_len = tx_pkt->l2_len;
+               inner_l2_len = tx_pkt->inner_l2_len;
                l3_len = tx_pkt->l3_len;
+               inner_l3_len = tx_pkt->inner_l3_len;
 
                /* Calculate the number of context descriptors needed. */
                nb_ctx = i40e_calc_context_desc(ol_flags);
@@ -1173,15 +1269,17 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                td_cmd |= I40E_TX_DESC_CMD_ICRC;
 
                /* Enable checksum offloading */
+               cd_tunneling_params = 0;
                i40e_txd_enable_checksum(ol_flags, &td_cmd, &td_offset,
-                                                       l2_len, l3_len);
+                                               l2_len, l3_len, inner_l2_len,
+                                               inner_l3_len,
+                                               &cd_tunneling_params);
 
                if (unlikely(nb_ctx)) {
                        /* Setup TX context descriptor if required */
                        volatile struct i40e_tx_context_desc *ctx_txd =
                                (volatile struct i40e_tx_context_desc *)\
                                                        &txr[tx_id];
-                       uint32_t cd_tunneling_params = 0;
                        uint16_t cd_l2tag2 = 0;
                        uint64_t cd_type_cmd_tso_mss =
                                I40E_TX_DESC_DTYPE_CONTEXT;
@@ -1443,14 +1541,58 @@ i40e_xmit_pkts_simple(void *tx_queue,
        return nb_tx;
 }
 
+/*
+ * Find the VSI the queue belongs to. 'queue_idx' is the queue index
+ * application used, which assume having sequential ones. But from driver's
+ * perspective, it's different. For example, q0 belongs to FDIR VSI, q1-q64
+ * to MAIN VSI, , q65-96 to SRIOV VSIs, q97-128 to VMDQ VSIs. For application
+ * running on host, q1-64 and q97-128 can be used, total 96 queues. They can
+ * use queue_idx from 0 to 95 to access queues, while real queue would be
+ * different. This function will do a queue mapping to find VSI the queue
+ * belongs to.
+ */
+static struct i40e_vsi*
+i40e_pf_get_vsi_by_qindex(struct i40e_pf *pf, uint16_t queue_idx)
+{
+       /* the queue in MAIN VSI range */
+       if (queue_idx < pf->main_vsi->nb_qps)
+               return pf->main_vsi;
+
+       queue_idx -= pf->main_vsi->nb_qps;
+
+       /* queue_idx is greater than VMDQ VSIs range */
+       if (queue_idx > pf->nb_cfg_vmdq_vsi * pf->vmdq_nb_qps - 1) {
+               PMD_INIT_LOG(ERR, "queue_idx out of range. VMDQ configured?");
+               return NULL;
+       }
+
+       return pf->vmdq[queue_idx / pf->vmdq_nb_qps].vsi;
+}
+
+static uint16_t
+i40e_get_queue_offset_by_qindex(struct i40e_pf *pf, uint16_t queue_idx)
+{
+       /* the queue in MAIN VSI range */
+       if (queue_idx < pf->main_vsi->nb_qps)
+               return queue_idx;
+
+       /* It's VMDQ queues */
+       queue_idx -= pf->main_vsi->nb_qps;
+
+       if (pf->nb_cfg_vmdq_vsi)
+               return queue_idx % pf->vmdq_nb_qps;
+       else {
+               PMD_INIT_LOG(ERR, "Fail to get queue offset");
+               return (uint16_t)(-1);
+       }
+}
+
 int
 i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-       struct i40e_vsi *vsi = I40E_DEV_PRIVATE_TO_VSI(dev->data->dev_private);
        struct i40e_rx_queue *rxq;
        int err = -1;
-       struct i40e_hw *hw = I40E_VSI_TO_HW(vsi);
-       uint16_t q_base = vsi->base_queue;
+       struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
        PMD_INIT_FUNC_TRACE();
 
@@ -1468,7 +1610,7 @@ i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
                /* Init the RX tail regieter. */
                I40E_PCI_REG_WRITE(rxq->qrx_tail, rxq->nb_rx_desc - 1);
 
-               err = i40e_switch_rx_queue(hw, rx_queue_id + q_base, TRUE);
+               err = i40e_switch_rx_queue(hw, rxq->reg_idx, TRUE);
 
                if (err) {
                        PMD_DRV_LOG(ERR, "Failed to switch RX queue %u on",
@@ -1485,16 +1627,18 @@ i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 int
 i40e_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-       struct i40e_vsi *vsi = I40E_DEV_PRIVATE_TO_VSI(dev->data->dev_private);
        struct i40e_rx_queue *rxq;
        int err;
-       struct i40e_hw *hw = I40E_VSI_TO_HW(vsi);
-       uint16_t q_base = vsi->base_queue;
+       struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
        if (rx_queue_id < dev->data->nb_rx_queues) {
                rxq = dev->data->rx_queues[rx_queue_id];
 
-               err = i40e_switch_rx_queue(hw, rx_queue_id + q_base, FALSE);
+               /*
+               * rx_queue_id is queue id aplication refers to, while
+               * rxq->reg_idx is the real queue index.
+               */
+               err = i40e_switch_rx_queue(hw, rxq->reg_idx, FALSE);
 
                if (err) {
                        PMD_DRV_LOG(ERR, "Failed to switch RX queue %u off",
@@ -1511,15 +1655,20 @@ i40e_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 int
 i40e_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 {
-       struct i40e_vsi *vsi = I40E_DEV_PRIVATE_TO_VSI(dev->data->dev_private);
        int err = -1;
-       struct i40e_hw *hw = I40E_VSI_TO_HW(vsi);
-       uint16_t q_base = vsi->base_queue;
+       struct i40e_tx_queue *txq;
+       struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
        PMD_INIT_FUNC_TRACE();
 
        if (tx_queue_id < dev->data->nb_tx_queues) {
-               err = i40e_switch_tx_queue(hw, tx_queue_id + q_base, TRUE);
+               txq = dev->data->tx_queues[tx_queue_id];
+
+               /*
+               * tx_queue_id is queue id aplication refers to, while
+               * rxq->reg_idx is the real queue index.
+               */
+               err = i40e_switch_tx_queue(hw, txq->reg_idx, TRUE);
                if (err)
                        PMD_DRV_LOG(ERR, "Failed to switch TX queue %u on",
                                    tx_queue_id);
@@ -1531,16 +1680,18 @@ i40e_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 int
 i40e_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 {
-       struct i40e_vsi *vsi = I40E_DEV_PRIVATE_TO_VSI(dev->data->dev_private);
        struct i40e_tx_queue *txq;
        int err;
-       struct i40e_hw *hw = I40E_VSI_TO_HW(vsi);
-       uint16_t q_base = vsi->base_queue;
+       struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
        if (tx_queue_id < dev->data->nb_tx_queues) {
                txq = dev->data->tx_queues[tx_queue_id];
 
-               err = i40e_switch_tx_queue(hw, tx_queue_id + q_base, FALSE);
+               /*
+               * tx_queue_id is queue id aplication refers to, while
+               * txq->reg_idx is the real queue index.
+               */
+               err = i40e_switch_tx_queue(hw, txq->reg_idx, FALSE);
 
                if (err) {
                        PMD_DRV_LOG(ERR, "Failed to switch TX queue %u of",
@@ -1563,14 +1714,23 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
                        const struct rte_eth_rxconf *rx_conf,
                        struct rte_mempool *mp)
 {
-       struct i40e_vsi *vsi = I40E_DEV_PRIVATE_TO_VSI(dev->data->dev_private);
+       struct i40e_vsi *vsi;
+       struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
        struct i40e_rx_queue *rxq;
        const struct rte_memzone *rz;
        uint32_t ring_size;
        uint16_t len;
        int use_def_burst_func = 1;
 
-       if (!vsi || queue_idx >= vsi->nb_qps) {
+       if (hw->mac.type == I40E_MAC_VF) {
+               struct i40e_vf *vf =
+                       I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+               vsi = &vf->vsi;
+       } else
+               vsi = i40e_pf_get_vsi_by_qindex(pf, queue_idx);
+
+       if (vsi == NULL) {
                PMD_DRV_LOG(ERR, "VSI not available or queue "
                            "index exceeds the maximum");
                return I40E_ERR_PARAM;
@@ -1592,7 +1752,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
        /* Allocate the rx queue data structure */
        rxq = rte_zmalloc_socket("i40e rx queue",
                                 sizeof(struct i40e_rx_queue),
-                                CACHE_LINE_SIZE,
+                                RTE_CACHE_LINE_SIZE,
                                 socket_id);
        if (!rxq) {
                PMD_DRV_LOG(ERR, "Failed to allocate memory for "
@@ -1603,7 +1763,12 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
        rxq->nb_rx_desc = nb_desc;
        rxq->rx_free_thresh = rx_conf->rx_free_thresh;
        rxq->queue_id = queue_idx;
-       rxq->reg_idx = vsi->base_queue + queue_idx;
+       if (hw->mac.type == I40E_MAC_VF)
+               rxq->reg_idx = queue_idx;
+       else /* PF device */
+               rxq->reg_idx = vsi->base_queue +
+                       i40e_get_queue_offset_by_qindex(pf, queue_idx);
+
        rxq->port_id = dev->data->port_id;
        rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
                                                        0 : ETHER_CRC_LEN);
@@ -1646,7 +1811,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
        rxq->sw_ring =
                rte_zmalloc_socket("i40e rx sw ring",
                                   sizeof(struct i40e_rx_entry) * len,
-                                  CACHE_LINE_SIZE,
+                                  RTE_CACHE_LINE_SIZE,
                                   socket_id);
        if (!rxq->sw_ring) {
                i40e_dev_rx_queue_release(rxq);
@@ -1761,13 +1926,22 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
                        unsigned int socket_id,
                        const struct rte_eth_txconf *tx_conf)
 {
-       struct i40e_vsi *vsi = I40E_DEV_PRIVATE_TO_VSI(dev->data->dev_private);
+       struct i40e_vsi *vsi;
+       struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
        struct i40e_tx_queue *txq;
        const struct rte_memzone *tz;
        uint32_t ring_size;
        uint16_t tx_rs_thresh, tx_free_thresh;
 
-       if (!vsi || queue_idx >= vsi->nb_qps) {
+       if (hw->mac.type == I40E_MAC_VF) {
+               struct i40e_vf *vf =
+                       I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+               vsi = &vf->vsi;
+       } else
+               vsi = i40e_pf_get_vsi_by_qindex(pf, queue_idx);
+
+       if (vsi == NULL) {
                PMD_DRV_LOG(ERR, "VSI is NULL, or queue index (%u) "
                            "exceeds the maximum", queue_idx);
                return I40E_ERR_PARAM;
@@ -1862,7 +2036,7 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
        /* Allocate the TX queue data structure. */
        txq = rte_zmalloc_socket("i40e tx queue",
                                  sizeof(struct i40e_tx_queue),
-                                 CACHE_LINE_SIZE,
+                                 RTE_CACHE_LINE_SIZE,
                                  socket_id);
        if (!txq) {
                PMD_DRV_LOG(ERR, "Failed to allocate memory for "
@@ -1891,7 +2065,12 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
        txq->hthresh = tx_conf->tx_thresh.hthresh;
        txq->wthresh = tx_conf->tx_thresh.wthresh;
        txq->queue_id = queue_idx;
-       txq->reg_idx = vsi->base_queue + queue_idx;
+       if (hw->mac.type == I40E_MAC_VF)
+               txq->reg_idx = queue_idx;
+       else /* PF device */
+               txq->reg_idx = vsi->base_queue +
+                       i40e_get_queue_offset_by_qindex(pf, queue_idx);
+
        txq->port_id = dev->data->port_id;
        txq->txq_flags = tx_conf->txq_flags;
        txq->vsi = vsi;
@@ -1908,7 +2087,7 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
        txq->sw_ring =
                rte_zmalloc_socket("i40e tx sw ring",
                                   sizeof(struct i40e_tx_entry) * nb_desc,
-                                  CACHE_LINE_SIZE,
+                                  RTE_CACHE_LINE_SIZE,
                                   socket_id);
        if (!txq->sw_ring) {
                i40e_dev_tx_queue_release(txq);
@@ -1974,6 +2153,24 @@ i40e_ring_dma_zone_reserve(struct rte_eth_dev *dev,
 #endif
 }
 
+const struct rte_memzone *
+i40e_memzone_reserve(const char *name, uint32_t len, int socket_id)
+{
+       const struct rte_memzone *mz = NULL;
+
+       mz = rte_memzone_lookup(name);
+       if (mz)
+               return mz;
+#ifdef RTE_LIBRTE_XEN_DOM0
+       mz = rte_memzone_reserve_bounded(name, len,
+               socket_id, 0, I40E_ALIGN, RTE_PGSIZE_2M);
+#else
+       mz = rte_memzone_reserve_aligned(name, len,
+                               socket_id, 0, I40E_ALIGN);
+#endif
+       return mz;
+}
+
 void
 i40e_rx_queue_release_mbufs(struct i40e_rx_queue *rxq)
 {
@@ -2107,6 +2304,8 @@ i40e_tx_queue_init(struct i40e_tx_queue *txq)
        tx_ctx.base = txq->tx_ring_phys_addr / I40E_QUEUE_BASE_ADDR_UNIT;
        tx_ctx.qlen = txq->nb_tx_desc;
        tx_ctx.rdylist = rte_le_to_cpu_16(vsi->info.qs_handle[0]);
+       if (vsi->type == I40E_VSI_FDIR)
+               tx_ctx.fd_ena = TRUE;
 
        err = i40e_clear_lan_tx_queue_context(hw, pf_q);
        if (err != I40E_SUCCESS) {
@@ -2323,3 +2522,127 @@ i40e_dev_clear_queues(struct rte_eth_dev *dev)
                i40e_reset_rx_queue(dev->data->rx_queues[i]);
        }
 }
+
+#define I40E_FDIR_NUM_TX_DESC  I40E_MIN_RING_DESC
+#define I40E_FDIR_NUM_RX_DESC  I40E_MIN_RING_DESC
+
+enum i40e_status_code
+i40e_fdir_setup_tx_resources(struct i40e_pf *pf)
+{
+       struct i40e_tx_queue *txq;
+       const struct rte_memzone *tz = NULL;
+       uint32_t ring_size;
+       struct rte_eth_dev *dev = pf->adapter->eth_dev;
+
+       if (!pf) {
+               PMD_DRV_LOG(ERR, "PF is not available");
+               return I40E_ERR_BAD_PTR;
+       }
+
+       /* Allocate the TX queue data structure. */
+       txq = rte_zmalloc_socket("i40e fdir tx queue",
+                                 sizeof(struct i40e_tx_queue),
+                                 RTE_CACHE_LINE_SIZE,
+                                 SOCKET_ID_ANY);
+       if (!txq) {
+               PMD_DRV_LOG(ERR, "Failed to allocate memory for "
+                                       "tx queue structure.");
+               return I40E_ERR_NO_MEMORY;
+       }
+
+       /* Allocate TX hardware ring descriptors. */
+       ring_size = sizeof(struct i40e_tx_desc) * I40E_FDIR_NUM_TX_DESC;
+       ring_size = RTE_ALIGN(ring_size, I40E_DMA_MEM_ALIGN);
+
+       tz = i40e_ring_dma_zone_reserve(dev,
+                                       "fdir_tx_ring",
+                                       I40E_FDIR_QUEUE_ID,
+                                       ring_size,
+                                       SOCKET_ID_ANY);
+       if (!tz) {
+               i40e_dev_tx_queue_release(txq);
+               PMD_DRV_LOG(ERR, "Failed to reserve DMA memory for TX.");
+               return I40E_ERR_NO_MEMORY;
+       }
+
+       txq->nb_tx_desc = I40E_FDIR_NUM_TX_DESC;
+       txq->queue_id = I40E_FDIR_QUEUE_ID;
+       txq->reg_idx = pf->fdir.fdir_vsi->base_queue;
+       txq->vsi = pf->fdir.fdir_vsi;
+
+#ifdef RTE_LIBRTE_XEN_DOM0
+       txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
+#else
+       txq->tx_ring_phys_addr = (uint64_t)tz->phys_addr;
+#endif
+       txq->tx_ring = (struct i40e_tx_desc *)tz->addr;
+       /*
+        * don't need to allocate software ring and reset for the fdir
+        * program queue just set the queue has been configured.
+        */
+       txq->q_set = TRUE;
+       pf->fdir.txq = txq;
+
+       return I40E_SUCCESS;
+}
+
+enum i40e_status_code
+i40e_fdir_setup_rx_resources(struct i40e_pf *pf)
+{
+       struct i40e_rx_queue *rxq;
+       const struct rte_memzone *rz = NULL;
+       uint32_t ring_size;
+       struct rte_eth_dev *dev = pf->adapter->eth_dev;
+
+       if (!pf) {
+               PMD_DRV_LOG(ERR, "PF is not available");
+               return I40E_ERR_BAD_PTR;
+       }
+
+       /* Allocate the RX queue data structure. */
+       rxq = rte_zmalloc_socket("i40e fdir rx queue",
+                                 sizeof(struct i40e_rx_queue),
+                                 RTE_CACHE_LINE_SIZE,
+                                 SOCKET_ID_ANY);
+       if (!rxq) {
+               PMD_DRV_LOG(ERR, "Failed to allocate memory for "
+                                       "rx queue structure.");
+               return I40E_ERR_NO_MEMORY;
+       }
+
+       /* Allocate RX hardware ring descriptors. */
+       ring_size = sizeof(union i40e_rx_desc) * I40E_FDIR_NUM_RX_DESC;
+       ring_size = RTE_ALIGN(ring_size, I40E_DMA_MEM_ALIGN);
+
+       rz = i40e_ring_dma_zone_reserve(dev,
+                                       "fdir_rx_ring",
+                                       I40E_FDIR_QUEUE_ID,
+                                       ring_size,
+                                       SOCKET_ID_ANY);
+       if (!rz) {
+               i40e_dev_rx_queue_release(rxq);
+               PMD_DRV_LOG(ERR, "Failed to reserve DMA memory for RX.");
+               return I40E_ERR_NO_MEMORY;
+       }
+
+       rxq->nb_rx_desc = I40E_FDIR_NUM_RX_DESC;
+       rxq->queue_id = I40E_FDIR_QUEUE_ID;
+       rxq->reg_idx = pf->fdir.fdir_vsi->base_queue;
+       rxq->vsi = pf->fdir.fdir_vsi;
+
+#ifdef RTE_LIBRTE_XEN_DOM0
+       rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
+#else
+       rxq->rx_ring_phys_addr = (uint64_t)rz->phys_addr;
+#endif
+       rxq->rx_ring = (union i40e_rx_desc *)rz->addr;
+
+       /*
+        * Don't need to allocate software ring and reset for the fdir
+        * rx queue, just set the queue has been configured.
+        */
+       rxq->q_set = TRUE;
+       pf->fdir.rxq = rxq;
+
+       return I40E_SUCCESS;
+}