i40e: fix alignment of HW descriptors number
[dpdk.git] / drivers / net / i40e / i40e_rxtx.c
index fd656d5..58aec9f 100644 (file)
@@ -57,9 +57,6 @@
 #include "i40e_ethdev.h"
 #include "i40e_rxtx.h"
 
-#define I40E_MIN_RING_DESC     64
-#define I40E_MAX_RING_DESC     4096
-#define I40E_ALIGN             128
 #define DEFAULT_TX_RS_THRESH   32
 #define DEFAULT_TX_FREE_THRESH 32
 #define I40E_MAX_PKT_TYPE      256
@@ -68,6 +65,9 @@
 
 #define I40E_DMA_MEM_ALIGN 4096
 
+/* Base address of the HW descriptor ring should be 128B aligned. */
+#define I40E_RING_BASE_ALIGN   128
+
 #define I40E_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
                                        ETH_TXQ_FLAGS_NOOFFLOADS)
 
@@ -1788,9 +1788,6 @@ i40e_tx_free_bufs(struct i40e_tx_queue *txq)
        return txq->tx_rs_thresh;
 }
 
-#define I40E_TD_CMD (I40E_TX_DESC_CMD_ICRC |\
-                    I40E_TX_DESC_CMD_EOP)
-
 /* Populate 4 descriptors with data from 4 mbufs */
 static inline void
 tx4(volatile struct i40e_tx_desc *txdp, struct rte_mbuf **pkts)
@@ -2108,10 +2105,13 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
        struct i40e_vsi *vsi;
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+       struct i40e_adapter *ad =
+               I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
        struct i40e_rx_queue *rxq;
        const struct rte_memzone *rz;
        uint32_t ring_size;
-       uint16_t len;
+       uint16_t len, i;
+       uint16_t base, bsf, tc_mapping;
        int use_def_burst_func = 1;
 
        if (hw->mac.type == I40E_MAC_VF) {
@@ -2126,9 +2126,9 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
                            "index exceeds the maximum");
                return I40E_ERR_PARAM;
        }
-       if (((nb_desc * sizeof(union i40e_rx_desc)) % I40E_ALIGN) != 0 ||
-                                       (nb_desc > I40E_MAX_RING_DESC) ||
-                                       (nb_desc < I40E_MIN_RING_DESC)) {
+       if (nb_desc % I40E_ALIGN_RING_DESC != 0 ||
+                       (nb_desc > I40E_MAX_RING_DESC) ||
+                       (nb_desc < I40E_MIN_RING_DESC)) {
                PMD_DRV_LOG(ERR, "Number (%u) of receive descriptors is "
                            "invalid", nb_desc);
                return I40E_ERR_PARAM;
@@ -2216,13 +2216,12 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 
        use_def_burst_func = check_rx_burst_bulk_alloc_preconditions(rxq);
 
-       if (!use_def_burst_func && !dev->data->scattered_rx) {
+       if (!use_def_burst_func) {
 #ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
                PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
                             "satisfied. Rx Burst Bulk Alloc function will be "
                             "used on port=%d, queue=%d.",
                             rxq->port_id, rxq->queue_id);
-               dev->rx_pkt_burst = i40e_recv_pkts_bulk_alloc;
 #endif /* RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC */
        } else {
                PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
@@ -2230,6 +2229,20 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
                             "or RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC is "
                             "not enabled on port=%d, queue=%d.",
                             rxq->port_id, rxq->queue_id);
+               ad->rx_bulk_alloc_allowed = false;
+       }
+
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+               if (!(vsi->enabled_tc & (1 << i)))
+                       continue;
+               tc_mapping = rte_le_to_cpu_16(vsi->info.tc_mapping[i]);
+               base = (tc_mapping & I40E_AQ_VSI_TC_QUE_OFFSET_MASK) >>
+                       I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT;
+               bsf = (tc_mapping & I40E_AQ_VSI_TC_QUE_NUMBER_MASK) >>
+                       I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT;
+
+               if (queue_idx >= base && queue_idx < (base + BIT(bsf)))
+                       rxq->dcb_tc = i;
        }
 
        return 0;
@@ -2324,6 +2337,7 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
        const struct rte_memzone *tz;
        uint32_t ring_size;
        uint16_t tx_rs_thresh, tx_free_thresh;
+       uint16_t i, base, bsf, tc_mapping;
 
        if (hw->mac.type == I40E_MAC_VF) {
                struct i40e_vf *vf =
@@ -2338,9 +2352,9 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
                return I40E_ERR_PARAM;
        }
 
-       if (((nb_desc * sizeof(struct i40e_tx_desc)) % I40E_ALIGN) != 0 ||
-                                       (nb_desc > I40E_MAX_RING_DESC) ||
-                                       (nb_desc < I40E_MIN_RING_DESC)) {
+       if (nb_desc % I40E_ALIGN_RING_DESC != 0 ||
+                       (nb_desc > I40E_MAX_RING_DESC) ||
+                       (nb_desc < I40E_MIN_RING_DESC)) {
                PMD_DRV_LOG(ERR, "Number (%u) of transmit descriptors is "
                            "invalid", nb_desc);
                return I40E_ERR_PARAM;
@@ -2491,13 +2505,19 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
        dev->data->tx_queues[queue_idx] = txq;
 
        /* Use a simple TX queue without offloads or multi segs if possible */
-       if (((txq->txq_flags & I40E_SIMPLE_FLAGS) == I40E_SIMPLE_FLAGS) &&
-                               (txq->tx_rs_thresh >= I40E_TX_MAX_BURST)) {
-               PMD_INIT_LOG(INFO, "Using simple tx path");
-               dev->tx_pkt_burst = i40e_xmit_pkts_simple;
-       } else {
-               PMD_INIT_LOG(INFO, "Using full-featured tx path");
-               dev->tx_pkt_burst = i40e_xmit_pkts;
+       i40e_set_tx_function_flag(dev, txq);
+
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+               if (!(vsi->enabled_tc & (1 << i)))
+                       continue;
+               tc_mapping = rte_le_to_cpu_16(vsi->info.tc_mapping[i]);
+               base = (tc_mapping & I40E_AQ_VSI_TC_QUE_OFFSET_MASK) >>
+                       I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT;
+               bsf = (tc_mapping & I40E_AQ_VSI_TC_QUE_NUMBER_MASK) >>
+                       I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT;
+
+               if (queue_idx >= base && queue_idx < (base + BIT(bsf)))
+                       txq->dcb_tc = i;
        }
 
        return 0;
@@ -2537,10 +2557,10 @@ i40e_ring_dma_zone_reserve(struct rte_eth_dev *dev,
 
 #ifdef RTE_LIBRTE_XEN_DOM0
        return rte_memzone_reserve_bounded(z_name, ring_size,
-               socket_id, 0, I40E_ALIGN, RTE_PGSIZE_2M);
+               socket_id, 0, I40E_RING_BASE_ALIGN, RTE_PGSIZE_2M);
 #else
        return rte_memzone_reserve_aligned(z_name, ring_size,
-                               socket_id, 0, I40E_ALIGN);
+                               socket_id, 0, I40E_RING_BASE_ALIGN);
 #endif
 }
 
@@ -2554,10 +2574,10 @@ i40e_memzone_reserve(const char *name, uint32_t len, int socket_id)
                return mz;
 #ifdef RTE_LIBRTE_XEN_DOM0
        mz = rte_memzone_reserve_bounded(name, len,
-               socket_id, 0, I40E_ALIGN, RTE_PGSIZE_2M);
+               socket_id, 0, I40E_RING_BASE_ALIGN, RTE_PGSIZE_2M);
 #else
        mz = rte_memzone_reserve_aligned(name, len,
-                               socket_id, 0, I40E_ALIGN);
+                               socket_id, 0, I40E_RING_BASE_ALIGN);
 #endif
        return mz;
 }
@@ -2567,6 +2587,12 @@ i40e_rx_queue_release_mbufs(struct i40e_rx_queue *rxq)
 {
        uint16_t i;
 
+       /* SSE Vector driver has a different way of releasing mbufs. */
+       if (rxq->rx_using_sse) {
+               i40e_rx_queue_release_mbufs_vec(rxq);
+               return;
+       }
+
        if (!rxq || !rxq->sw_ring) {
                PMD_DRV_LOG(DEBUG, "Pointer to rxq or sw_ring is NULL");
                return;
@@ -2625,6 +2651,9 @@ i40e_reset_rx_queue(struct i40e_rx_queue *rxq)
        rxq->nb_rx_hold = 0;
        rxq->pkt_first_seg = NULL;
        rxq->pkt_last_seg = NULL;
+
+       rxq->rxrearm_start = 0;
+       rxq->rxrearm_nb = 0;
 }
 
 void
@@ -2703,7 +2732,7 @@ i40e_tx_queue_init(struct i40e_tx_queue *txq)
 #ifdef RTE_LIBRTE_IEEE1588
        tx_ctx.timesync_ena = 1;
 #endif
-       tx_ctx.rdylist = rte_le_to_cpu_16(vsi->info.qs_handle[0]);
+       tx_ctx.rdylist = rte_le_to_cpu_16(vsi->info.qs_handle[txq->dcb_tc]);
        if (vsi->type == I40E_VSI_FDIR)
                tx_ctx.fd_ena = TRUE;
 
@@ -2837,7 +2866,6 @@ i40e_rx_queue_init(struct i40e_rx_queue *rxq)
        int err = I40E_SUCCESS;
        struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->vsi);
        struct rte_eth_dev_data *dev_data = I40E_VSI_TO_DEV_DATA(rxq->vsi);
-       struct rte_eth_dev *dev = I40E_VSI_TO_ETH_DEV(rxq->vsi);
        uint16_t pf_q = rxq->reg_idx;
        uint16_t buf_size;
        struct i40e_hmc_obj_rxq rx_ctx;
@@ -2893,7 +2921,6 @@ i40e_rx_queue_init(struct i40e_rx_queue *rxq)
        /* Check if scattered RX needs to be used. */
        if ((rxq->max_pkt_len + 2 * I40E_VLAN_TAG_SIZE) > buf_size) {
                dev_data->scattered_rx = 1;
-               dev->rx_pkt_burst = i40e_recv_scattered_pkts;
        }
 
        /* Init the RX tail regieter. */
@@ -3063,3 +3090,201 @@ i40e_fdir_setup_rx_resources(struct i40e_pf *pf)
 
        return I40E_SUCCESS;
 }
+
+void __attribute__((cold))
+i40e_set_rx_function(struct rte_eth_dev *dev)
+{
+       struct i40e_adapter *ad =
+               I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       uint16_t rx_using_sse, i;
+       /* In order to allow Vector Rx there are a few configuration
+        * conditions to be met and Rx Bulk Allocation should be allowed.
+        */
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+               if (i40e_rx_vec_dev_conf_condition_check(dev) ||
+                   !ad->rx_bulk_alloc_allowed) {
+                       PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet"
+                                    " Vector Rx preconditions",
+                                    dev->data->port_id);
+
+                       ad->rx_vec_allowed = false;
+               }
+               if (ad->rx_vec_allowed) {
+                       for (i = 0; i < dev->data->nb_rx_queues; i++) {
+                               struct i40e_rx_queue *rxq =
+                                       dev->data->rx_queues[i];
+
+                               if (i40e_rxq_vec_setup(rxq)) {
+                                       ad->rx_vec_allowed = false;
+                                       break;
+                               }
+                       }
+               }
+       }
+
+       if (dev->data->scattered_rx) {
+               /* Set the non-LRO scattered callback: there are Vector and
+                * single allocation versions.
+                */
+               if (ad->rx_vec_allowed) {
+                       PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
+                                           "callback (port=%d).",
+                                    dev->data->port_id);
+
+                       dev->rx_pkt_burst = i40e_recv_scattered_pkts_vec;
+               } else {
+                       PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
+                                          "allocation callback (port=%d).",
+                                    dev->data->port_id);
+                       dev->rx_pkt_burst = i40e_recv_scattered_pkts;
+               }
+       /* If parameters allow we are going to choose between the following
+        * callbacks:
+        *    - Vector
+        *    - Bulk Allocation
+        *    - Single buffer allocation (the simplest one)
+        */
+       } else if (ad->rx_vec_allowed) {
+               PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
+                                   "burst size no less than %d (port=%d).",
+                            RTE_I40E_DESCS_PER_LOOP,
+                            dev->data->port_id);
+
+               dev->rx_pkt_burst = i40e_recv_pkts_vec;
+       } else if (ad->rx_bulk_alloc_allowed) {
+               PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
+                                   "satisfied. Rx Burst Bulk Alloc function "
+                                   "will be used on port=%d.",
+                            dev->data->port_id);
+
+               dev->rx_pkt_burst = i40e_recv_pkts_bulk_alloc;
+       } else {
+               PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
+                                   "satisfied, or Scattered Rx is requested "
+                                   "(port=%d).",
+                            dev->data->port_id);
+
+               dev->rx_pkt_burst = i40e_recv_pkts;
+       }
+
+       /* Propagate information about RX function choice through all queues. */
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+               rx_using_sse =
+                       (dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec ||
+                        dev->rx_pkt_burst == i40e_recv_pkts_vec);
+
+               for (i = 0; i < dev->data->nb_rx_queues; i++) {
+                       struct i40e_rx_queue *rxq = dev->data->rx_queues[i];
+
+                       rxq->rx_using_sse = rx_using_sse;
+               }
+       }
+}
+
+void __attribute__((cold))
+i40e_set_tx_function_flag(struct rte_eth_dev *dev, struct i40e_tx_queue *txq)
+{
+       struct i40e_adapter *ad =
+               I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+
+       /* Use a simple Tx queue (no offloads, no multi segs) if possible */
+       if (((txq->txq_flags & I40E_SIMPLE_FLAGS) == I40E_SIMPLE_FLAGS)
+                       && (txq->tx_rs_thresh >= RTE_PMD_I40E_TX_MAX_BURST)) {
+               if (txq->tx_rs_thresh <= RTE_I40E_TX_MAX_FREE_BUF_SZ) {
+                       PMD_INIT_LOG(DEBUG, "Vector tx"
+                                    " can be enabled on this txq.");
+
+               } else {
+                       ad->tx_vec_allowed = false;
+               }
+       } else {
+               ad->tx_simple_allowed = false;
+       }
+}
+
+void __attribute__((cold))
+i40e_set_tx_function(struct rte_eth_dev *dev)
+{
+       struct i40e_adapter *ad =
+               I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       int i;
+
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+               if (ad->tx_vec_allowed) {
+                       for (i = 0; i < dev->data->nb_tx_queues; i++) {
+                               struct i40e_tx_queue *txq =
+                                       dev->data->tx_queues[i];
+
+                               if (i40e_txq_vec_setup(txq)) {
+                                       ad->tx_vec_allowed = false;
+                                       break;
+                               }
+                       }
+               }
+       }
+
+       if (ad->tx_simple_allowed) {
+               if (ad->tx_vec_allowed) {
+                       PMD_INIT_LOG(DEBUG, "Vector tx finally be used.");
+                       dev->tx_pkt_burst = i40e_xmit_pkts_vec;
+               } else {
+                       PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
+                       dev->tx_pkt_burst = i40e_xmit_pkts_simple;
+               }
+       } else {
+               PMD_INIT_LOG(DEBUG, "Xmit tx finally be used.");
+               dev->tx_pkt_burst = i40e_xmit_pkts;
+       }
+}
+
+/* Stubs needed for linkage when CONFIG_RTE_I40E_INC_VECTOR is set to 'n' */
+int __attribute__((weak))
+i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
+{
+       return -1;
+}
+
+uint16_t __attribute__((weak))
+i40e_recv_pkts_vec(
+       void __rte_unused *rx_queue,
+       struct rte_mbuf __rte_unused **rx_pkts,
+       uint16_t __rte_unused nb_pkts)
+{
+       return 0;
+}
+
+uint16_t __attribute__((weak))
+i40e_recv_scattered_pkts_vec(
+       void __rte_unused *rx_queue,
+       struct rte_mbuf __rte_unused **rx_pkts,
+       uint16_t __rte_unused nb_pkts)
+{
+       return 0;
+}
+
+int __attribute__((weak))
+i40e_rxq_vec_setup(struct i40e_rx_queue __rte_unused *rxq)
+{
+       return -1;
+}
+
+int __attribute__((weak))
+i40e_txq_vec_setup(struct i40e_tx_queue __rte_unused *txq)
+{
+       return -1;
+}
+
+void __attribute__((weak))
+i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue __rte_unused*rxq)
+{
+       return;
+}
+
+uint16_t __attribute__((weak))
+i40e_xmit_pkts_vec(void __rte_unused *tx_queue,
+                  struct rte_mbuf __rte_unused **tx_pkts,
+                  uint16_t __rte_unused nb_pkts)
+{
+       return 0;
+}
+