net/txgbe: support priority flow control
authorJiawen Wu <jiawenwu@trustnetic.com>
Mon, 19 Oct 2020 08:54:03 +0000 (16:54 +0800)
committerFerruh Yigit <ferruh.yigit@intel.com>
Tue, 3 Nov 2020 22:24:27 +0000 (23:24 +0100)
Add priority flow control support.

Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
drivers/net/txgbe/base/txgbe_dcb.c
drivers/net/txgbe/base/txgbe_dcb.h
drivers/net/txgbe/base/txgbe_dcb_hw.c
drivers/net/txgbe/txgbe_ethdev.c
drivers/net/txgbe/txgbe_rxtx.c

index da6a3a7..7e9a16c 100644 (file)
@@ -7,6 +7,146 @@
 #include "txgbe_dcb.h"
 #include "txgbe_dcb_hw.h"
 
+/**
+ *  txgbe_pfc_enable - Enable flow control
+ *  @hw: pointer to hardware structure
+ *  @tc_num: traffic class number
+ *  Enable flow control according to the current settings.
+ */
+int
+txgbe_dcb_pfc_enable(struct txgbe_hw *hw, uint8_t tc_num)
+{
+       int ret_val = 0;
+       uint32_t mflcn_reg, fccfg_reg;
+       uint32_t pause_time;
+       uint32_t fcrtl, fcrth;
+       uint8_t i;
+       uint8_t nb_rx_en;
+
+       /* Validate the water mark configuration */
+       if (!hw->fc.pause_time) {
+               ret_val = TXGBE_ERR_INVALID_LINK_SETTINGS;
+               goto out;
+       }
+
+       /* Low water mark of zero causes XOFF floods */
+       if (hw->fc.current_mode & txgbe_fc_tx_pause) {
+                /* High/Low water can not be 0 */
+               if (!hw->fc.high_water[tc_num] ||
+                   !hw->fc.low_water[tc_num]) {
+                       PMD_INIT_LOG(ERR, "Invalid water mark configuration");
+                       ret_val = TXGBE_ERR_INVALID_LINK_SETTINGS;
+                       goto out;
+               }
+
+               if (hw->fc.low_water[tc_num] >= hw->fc.high_water[tc_num]) {
+                       PMD_INIT_LOG(ERR, "Invalid water mark configuration");
+                       ret_val = TXGBE_ERR_INVALID_LINK_SETTINGS;
+                       goto out;
+               }
+       }
+       /* Negotiate the fc mode to use */
+       txgbe_fc_autoneg(hw);
+
+       /* Disable any previous flow control settings */
+       mflcn_reg = rd32(hw, TXGBE_RXFCCFG);
+       mflcn_reg &= ~(TXGBE_RXFCCFG_FC | TXGBE_RXFCCFG_PFC);
+
+       fccfg_reg = rd32(hw, TXGBE_TXFCCFG);
+       fccfg_reg &= ~(TXGBE_TXFCCFG_FC | TXGBE_TXFCCFG_PFC);
+
+       switch (hw->fc.current_mode) {
+       case txgbe_fc_none:
+               /*
+                * If the count of enabled RX Priority Flow control > 1,
+                * and the TX pause can not be disabled
+                */
+               nb_rx_en = 0;
+               for (i = 0; i < TXGBE_DCB_TC_MAX; i++) {
+                       uint32_t reg = rd32(hw, TXGBE_FCWTRHI(i));
+                       if (reg & TXGBE_FCWTRHI_XOFF)
+                               nb_rx_en++;
+               }
+               if (nb_rx_en > 1)
+                       fccfg_reg |= TXGBE_TXFCCFG_PFC;
+               break;
+       case txgbe_fc_rx_pause:
+               /*
+                * Rx Flow control is enabled and Tx Flow control is
+                * disabled by software override. Since there really
+                * isn't a way to advertise that we are capable of RX
+                * Pause ONLY, we will advertise that we support both
+                * symmetric and asymmetric Rx PAUSE.  Later, we will
+                * disable the adapter's ability to send PAUSE frames.
+                */
+               mflcn_reg |= TXGBE_RXFCCFG_PFC;
+               /*
+                * If the count of enabled RX Priority Flow control > 1,
+                * and the TX pause can not be disabled
+                */
+               nb_rx_en = 0;
+               for (i = 0; i < TXGBE_DCB_TC_MAX; i++) {
+                       uint32_t reg = rd32(hw, TXGBE_FCWTRHI(i));
+                       if (reg & TXGBE_FCWTRHI_XOFF)
+                               nb_rx_en++;
+               }
+               if (nb_rx_en > 1)
+                       fccfg_reg |= TXGBE_TXFCCFG_PFC;
+               break;
+       case txgbe_fc_tx_pause:
+               /*
+                * Tx Flow control is enabled, and Rx Flow control is
+                * disabled by software override.
+                */
+               fccfg_reg |= TXGBE_TXFCCFG_PFC;
+               break;
+       case txgbe_fc_full:
+               /* Flow control (both Rx and Tx) is enabled by SW override. */
+               mflcn_reg |= TXGBE_RXFCCFG_PFC;
+               fccfg_reg |= TXGBE_TXFCCFG_PFC;
+               break;
+       default:
+               PMD_DRV_LOG(DEBUG, "Flow control param set incorrectly");
+               ret_val = TXGBE_ERR_CONFIG;
+               goto out;
+       }
+
+       /* Set 802.3x based flow control settings. */
+       wr32(hw, TXGBE_RXFCCFG, mflcn_reg);
+       wr32(hw, TXGBE_TXFCCFG, fccfg_reg);
+
+       /* Set up and enable Rx high/low water mark thresholds, enable XON. */
+       if ((hw->fc.current_mode & txgbe_fc_tx_pause) &&
+               hw->fc.high_water[tc_num]) {
+               fcrtl = TXGBE_FCWTRLO_TH(hw->fc.low_water[tc_num]) |
+                       TXGBE_FCWTRLO_XON;
+               fcrth = TXGBE_FCWTRHI_TH(hw->fc.high_water[tc_num]) |
+                       TXGBE_FCWTRHI_XOFF;
+       } else {
+               /*
+                * In order to prevent Tx hangs when the internal Tx
+                * switch is enabled we must set the high water mark
+                * to the maximum FCRTH value.  This allows the Tx
+                * switch to function even under heavy Rx workloads.
+                */
+               fcrtl = 0;
+               fcrth = rd32(hw, TXGBE_PBRXSIZE(tc_num)) - 32;
+       }
+       wr32(hw, TXGBE_FCWTRLO(tc_num), fcrtl);
+       wr32(hw, TXGBE_FCWTRHI(tc_num), fcrth);
+
+       /* Configure pause time (2 TCs per register) */
+       pause_time = TXGBE_RXFCFSH_TIME(hw->fc.pause_time);
+       for (i = 0; i < (TXGBE_DCB_TC_MAX / 2); i++)
+               wr32(hw, TXGBE_FCXOFFTM(i), pause_time * 0x00010001);
+
+       /* Configure flow control refresh threshold value */
+       wr32(hw, TXGBE_RXFCRFSH, pause_time / 2);
+
+out:
+       return ret_val;
+}
+
 /**
  * txgbe_dcb_calculate_tc_credits_cee - Calculates traffic class credits
  * @hw: pointer to hardware structure
@@ -210,3 +350,11 @@ void txgbe_dcb_unpack_map_cee(struct txgbe_dcb_config *cfg, int direction,
                map[up] = txgbe_dcb_get_tc_from_up(cfg, direction, up);
 }
 
+/* Helper routines to abstract HW specifics from DCB netlink ops */
+s32 txgbe_dcb_config_pfc(struct txgbe_hw *hw, u8 pfc_en, u8 *map)
+{
+       int ret = TXGBE_ERR_PARAM;
+       ret = txgbe_dcb_config_pfc_raptor(hw, pfc_en, map);
+       return ret;
+}
+
index f08e030..cd87cf3 100644 (file)
@@ -92,6 +92,8 @@ struct txgbe_dcb_config {
        bool vt_mode;
 };
 
+int txgbe_dcb_pfc_enable(struct txgbe_hw *hw, u8 tc_num);
+
 /* DCB credits calculation */
 s32 txgbe_dcb_calculate_tc_credits_cee(struct txgbe_hw *hw,
                                   struct txgbe_dcb_config *dcb_config,
index 3c759c0..42742d0 100644 (file)
@@ -180,6 +180,79 @@ s32 txgbe_dcb_config_tx_data_arbiter_raptor(struct txgbe_hw *hw, u16 *refill,
        return 0;
 }
 
+/**
+ * txgbe_dcb_config_pfc_raptor - Configure priority flow control
+ * @hw: pointer to hardware structure
+ * @pfc_en: enabled pfc bitmask
+ * @map: priority to tc assignments indexed by priority
+ *
+ * Configure Priority Flow Control (PFC) for each traffic class.
+ */
+s32 txgbe_dcb_config_pfc_raptor(struct txgbe_hw *hw, u8 pfc_en, u8 *map)
+{
+       u32 i, j, fcrtl, reg;
+       u8 max_tc = 0;
+
+       /* Enable Transmit Priority Flow Control */
+       wr32(hw, TXGBE_TXFCCFG, TXGBE_TXFCCFG_PFC);
+
+       /* Enable Receive Priority Flow Control */
+       wr32m(hw, TXGBE_RXFCCFG, TXGBE_RXFCCFG_PFC,
+               pfc_en ? TXGBE_RXFCCFG_PFC : 0);
+
+       for (i = 0; i < TXGBE_DCB_UP_MAX; i++) {
+               if (map[i] > max_tc)
+                       max_tc = map[i];
+       }
+
+       /* Configure PFC Tx thresholds per TC */
+       for (i = 0; i <= max_tc; i++) {
+               int enabled = 0;
+
+               for (j = 0; j < TXGBE_DCB_UP_MAX; j++) {
+                       if (map[j] == i && (pfc_en & (1 << j))) {
+                               enabled = 1;
+                               break;
+                       }
+               }
+
+               if (enabled) {
+                       reg = TXGBE_FCWTRHI_TH(hw->fc.high_water[i]) |
+                             TXGBE_FCWTRHI_XOFF;
+                       fcrtl = TXGBE_FCWTRLO_TH(hw->fc.low_water[i]) |
+                               TXGBE_FCWTRLO_XON;
+                       wr32(hw, TXGBE_FCWTRLO(i), fcrtl);
+               } else {
+                       /*
+                        * In order to prevent Tx hangs when the internal Tx
+                        * switch is enabled we must set the high water mark
+                        * to the Rx packet buffer size - 24KB.  This allows
+                        * the Tx switch to function even under heavy Rx
+                        * workloads.
+                        */
+                       reg = rd32(hw, TXGBE_PBRXSIZE(i)) - 24576;
+                       wr32(hw, TXGBE_FCWTRLO(i), 0);
+               }
+
+               wr32(hw, TXGBE_FCWTRHI(i), reg);
+       }
+
+       for (; i < TXGBE_DCB_TC_MAX; i++) {
+               wr32(hw, TXGBE_FCWTRLO(i), 0);
+               wr32(hw, TXGBE_FCWTRHI(i), 0);
+       }
+
+       /* Configure pause time (2 TCs per register) */
+       reg = hw->fc.pause_time | (hw->fc.pause_time << 16);
+       for (i = 0; i < (TXGBE_DCB_TC_MAX / 2); i++)
+               wr32(hw, TXGBE_FCXOFFTM(i), reg);
+
+       /* Configure flow control refresh threshold value */
+       wr32(hw, TXGBE_RXFCRFSH, hw->fc.pause_time / 2);
+
+       return 0;
+}
+
 /**
  * txgbe_dcb_config_tc_stats_raptor - Config traffic class statistics
  * @hw: pointer to hardware structure
index 51ca11c..bfdf82a 100644 (file)
@@ -2799,6 +2799,59 @@ txgbe_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
        return -EIO;
 }
 
+static int
+txgbe_priority_flow_ctrl_set(struct rte_eth_dev *dev,
+               struct rte_eth_pfc_conf *pfc_conf)
+{
+       int err;
+       uint32_t rx_buf_size;
+       uint32_t max_high_water;
+       uint8_t tc_num;
+       uint8_t  map[TXGBE_DCB_UP_MAX] = { 0 };
+       struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
+       struct txgbe_dcb_config *dcb_config = TXGBE_DEV_DCB_CONFIG(dev);
+
+       enum txgbe_fc_mode rte_fcmode_2_txgbe_fcmode[] = {
+               txgbe_fc_none,
+               txgbe_fc_rx_pause,
+               txgbe_fc_tx_pause,
+               txgbe_fc_full
+       };
+
+       PMD_INIT_FUNC_TRACE();
+
+       txgbe_dcb_unpack_map_cee(dcb_config, TXGBE_DCB_RX_CONFIG, map);
+       tc_num = map[pfc_conf->priority];
+       rx_buf_size = rd32(hw, TXGBE_PBRXSIZE(tc_num));
+       PMD_INIT_LOG(DEBUG, "Rx packet buffer size = 0x%x", rx_buf_size);
+       /*
+        * At least reserve one Ethernet frame for watermark
+        * high_water/low_water in kilo bytes for txgbe
+        */
+       max_high_water = (rx_buf_size - RTE_ETHER_MAX_LEN) >> 10;
+       if (pfc_conf->fc.high_water > max_high_water ||
+           pfc_conf->fc.high_water <= pfc_conf->fc.low_water) {
+               PMD_INIT_LOG(ERR, "Invalid high/low water setup value in KB");
+               PMD_INIT_LOG(ERR, "High_water must <= 0x%x", max_high_water);
+               return -EINVAL;
+       }
+
+       hw->fc.requested_mode = rte_fcmode_2_txgbe_fcmode[pfc_conf->fc.mode];
+       hw->fc.pause_time = pfc_conf->fc.pause_time;
+       hw->fc.send_xon = pfc_conf->fc.send_xon;
+       hw->fc.low_water[tc_num] =  pfc_conf->fc.low_water;
+       hw->fc.high_water[tc_num] = pfc_conf->fc.high_water;
+
+       err = txgbe_dcb_pfc_enable(hw, tc_num);
+
+       /* Not negotiated is not an error case */
+       if (err == 0 || err == TXGBE_ERR_FC_NOT_NEGOTIATED)
+               return 0;
+
+       PMD_INIT_LOG(ERR, "txgbe_dcb_pfc_enable = 0x%x", err);
+       return -EIO;
+}
+
 int
 txgbe_dev_rss_reta_update(struct rte_eth_dev *dev,
                          struct rte_eth_rss_reta_entry64 *reta_conf,
@@ -3286,6 +3339,7 @@ static const struct eth_dev_ops txgbe_eth_dev_ops = {
        .tx_queue_release           = txgbe_dev_tx_queue_release,
        .flow_ctrl_get              = txgbe_flow_ctrl_get,
        .flow_ctrl_set              = txgbe_flow_ctrl_set,
+       .priority_flow_ctrl_set     = txgbe_priority_flow_ctrl_set,
        .mac_addr_add               = txgbe_add_rar,
        .mac_addr_remove            = txgbe_remove_rar,
        .mac_addr_set               = txgbe_set_default_mac_addr,
index 5fadbb1..3b9dff3 100644 (file)
@@ -3125,7 +3125,7 @@ txgbe_dcb_hw_configure(struct rte_eth_dev *dev,
                        struct txgbe_dcb_config *dcb_config)
 {
        int     ret = 0;
-       uint8_t i, nb_tcs;
+       uint8_t i, pfc_en, nb_tcs;
        uint16_t pbsize, rx_buffer_size;
        uint8_t config_dcb_rx = 0;
        uint8_t config_dcb_tx = 0;
@@ -3299,6 +3299,26 @@ txgbe_dcb_hw_configure(struct rte_eth_dev *dev,
        /* Configure queue statistics registers */
        txgbe_dcb_config_tc_stats_raptor(hw, dcb_config);
 
+       /* Check if the PFC is supported */
+       if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
+               pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
+               for (i = 0; i < nb_tcs; i++) {
+                       /* If the TC count is 8,
+                        * and the default high_water is 48,
+                        * the low_water is 16 as default.
+                        */
+                       hw->fc.high_water[i] = (pbsize * 3) / 4;
+                       hw->fc.low_water[i] = pbsize / 4;
+                       /* Enable pfc for this TC */
+                       tc = &dcb_config->tc_config[i];
+                       tc->pfc = txgbe_dcb_pfc_enabled;
+               }
+               txgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
+               if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
+                       pfc_en &= 0x0F;
+               ret = txgbe_dcb_config_pfc(hw, pfc_en, map);
+       }
+
        return ret;
 }