net/mlx5: support flow aging
authorDong Zhou <dongz@mellanox.com>
Wed, 29 Apr 2020 02:25:09 +0000 (05:25 +0300)
committerFerruh Yigit <ferruh.yigit@intel.com>
Tue, 5 May 2020 13:54:27 +0000 (15:54 +0200)
Currently, there is no flow aging check and age-out event callback
mechanism for mlx5 driver, this patch implements it. It's included:
- Splitting the current counter container to aged or no-aged container
  since reducing memory consumption. Aged container will allocate extra
  memory to save the aging parameter from user configuration.
- Aging check and age-out event callback mechanism based on current
  counter. When a flow be checked aged-out, RTE_ETH_EVENT_FLOW_AGED
  event will be triggered to applications.
- Implement the new API: rte_flow_get_aged_flows, applications can use
  this API to get aged flows.

Signed-off-by: Dong Zhou <dongz@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
doc/guides/rel_notes/release_20_05.rst
drivers/net/mlx5/mlx5.c
drivers/net/mlx5/mlx5.h
drivers/net/mlx5/mlx5_flow.c
drivers/net/mlx5/mlx5_flow.h
drivers/net/mlx5/mlx5_flow_dv.c
drivers/net/mlx5/mlx5_flow_verbs.c

index c287cb4..42e69ce 100644 (file)
@@ -141,6 +141,7 @@ New Features
   * Added support for creating Relaxed Ordering Memory Regions.
   * Added support for jumbo frame size (9K MTU) in Multi-Packet RQ mode.
   * Optimized the memory consumption of flow.
+  * Added support for flow aging based on hardware counter.
 
 * **Updated the AESNI MB crypto PMD.**
 
index dbe22d6..4f704cb 100644 (file)
@@ -437,6 +437,27 @@ mlx5_flow_id_release(struct mlx5_flow_id_pool *pool, uint32_t id)
        return 0;
 }
 
+/**
+ * Initialize the shared aging list information per port.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_ibv_shared object.
+ */
+static void
+mlx5_flow_aging_init(struct mlx5_ibv_shared *sh)
+{
+       uint32_t i;
+       struct mlx5_age_info *age_info;
+
+       for (i = 0; i < sh->max_port; i++) {
+               age_info = &sh->port[i].age_info;
+               age_info->flags = 0;
+               TAILQ_INIT(&age_info->aged_counters);
+               rte_spinlock_init(&age_info->aged_sl);
+               MLX5_AGE_SET(age_info, MLX5_AGE_TRIGGER);
+       }
+}
+
 /**
  * Initialize the counters management structure.
  *
@@ -446,11 +467,14 @@ mlx5_flow_id_release(struct mlx5_flow_id_pool *pool, uint32_t id)
 static void
 mlx5_flow_counters_mng_init(struct mlx5_ibv_shared *sh)
 {
-       uint8_t i;
+       uint8_t i, age;
 
+       sh->cmng.age = 0;
        TAILQ_INIT(&sh->cmng.flow_counters);
-       for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i)
-               TAILQ_INIT(&sh->cmng.ccont[i].pool_list);
+       for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
+               for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i)
+                       TAILQ_INIT(&sh->cmng.ccont[i][age].pool_list);
+       }
 }
 
 /**
@@ -480,7 +504,7 @@ static void
 mlx5_flow_counters_mng_close(struct mlx5_ibv_shared *sh)
 {
        struct mlx5_counter_stats_mem_mng *mng;
-       uint8_t i;
+       uint8_t i, age = 0;
        int j;
        int retries = 1024;
 
@@ -491,36 +515,42 @@ mlx5_flow_counters_mng_close(struct mlx5_ibv_shared *sh)
                        break;
                rte_pause();
        }
-       for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i) {
-               struct mlx5_flow_counter_pool *pool;
-               uint32_t batch = !!(i % 2);
 
-               if (!sh->cmng.ccont[i].pools)
-                       continue;
-               pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
-               while (pool) {
-                       if (batch) {
-                               if (pool->min_dcs)
-                                       claim_zero
-                                       (mlx5_devx_cmd_destroy(pool->min_dcs));
-                       }
-                       for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) {
-                               if (MLX5_POOL_GET_CNT(pool, j)->action)
-                                       claim_zero
-                                       (mlx5_glue->destroy_flow_action
-                                        (MLX5_POOL_GET_CNT(pool, j)->action));
-                               if (!batch && MLX5_GET_POOL_CNT_EXT
-                                   (pool, j)->dcs)
-                                       claim_zero(mlx5_devx_cmd_destroy
-                                                 (MLX5_GET_POOL_CNT_EXT
-                                                 (pool, j)->dcs));
+       for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
+               for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i) {
+                       struct mlx5_flow_counter_pool *pool;
+                       uint32_t batch = !!(i % 2);
+
+                       if (!sh->cmng.ccont[i][age].pools)
+                               continue;
+                       pool = TAILQ_FIRST(&sh->cmng.ccont[i][age].pool_list);
+                       while (pool) {
+                               if (batch) {
+                                       if (pool->min_dcs)
+                                               claim_zero
+                                               (mlx5_devx_cmd_destroy
+                                               (pool->min_dcs));
+                               }
+                               for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) {
+                                       if (MLX5_POOL_GET_CNT(pool, j)->action)
+                                               claim_zero
+                                               (mlx5_glue->destroy_flow_action
+                                                (MLX5_POOL_GET_CNT
+                                                 (pool, j)->action));
+                                       if (!batch && MLX5_GET_POOL_CNT_EXT
+                                           (pool, j)->dcs)
+                                               claim_zero(mlx5_devx_cmd_destroy
+                                                         (MLX5_GET_POOL_CNT_EXT
+                                                         (pool, j)->dcs));
+                               }
+                               TAILQ_REMOVE(&sh->cmng.ccont[i][age].pool_list,
+                                       pool, next);
+                               rte_free(pool);
+                               pool = TAILQ_FIRST
+                                       (&sh->cmng.ccont[i][age].pool_list);
                        }
-                       TAILQ_REMOVE(&sh->cmng.ccont[i].pool_list, pool,
-                                    next);
-                       rte_free(pool);
-                       pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
+                       rte_free(sh->cmng.ccont[i][age].pools);
                }
-               rte_free(sh->cmng.ccont[i].pools);
        }
        mng = LIST_FIRST(&sh->cmng.mem_mngs);
        while (mng) {
@@ -788,6 +818,7 @@ mlx5_alloc_shared_ibctx(const struct mlx5_dev_spawn_data *spawn,
                err = rte_errno;
                goto error;
        }
+       mlx5_flow_aging_init(sh);
        mlx5_flow_counters_mng_init(sh);
        mlx5_flow_ipool_create(sh, config);
        /* Add device to memory callback list. */
index 4d9984f..1740d4a 100644 (file)
@@ -222,13 +222,22 @@ struct mlx5_drop {
 #define MLX5_COUNTERS_PER_POOL 512
 #define MLX5_MAX_PENDING_QUERIES 4
 #define MLX5_CNT_CONTAINER_RESIZE 64
+#define MLX5_CNT_AGE_OFFSET 0x80000000
 #define CNT_SIZE (sizeof(struct mlx5_flow_counter))
 #define CNTEXT_SIZE (sizeof(struct mlx5_flow_counter_ext))
+#define AGE_SIZE (sizeof(struct mlx5_age_param))
+#define MLX5_AGING_TIME_DELAY  7
 
 #define CNT_POOL_TYPE_EXT      (1 << 0)
+#define CNT_POOL_TYPE_AGE      (1 << 1)
 #define IS_EXT_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_EXT)
+#define IS_AGE_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_AGE)
+#define MLX_CNT_IS_AGE(counter) ((counter) & MLX5_CNT_AGE_OFFSET ? 1 : 0)
+
 #define MLX5_CNT_LEN(pool) \
-       (CNT_SIZE + (IS_EXT_POOL(pool) ? CNTEXT_SIZE : 0))
+       (CNT_SIZE + \
+       (IS_AGE_POOL(pool) ? AGE_SIZE : 0) + \
+       (IS_EXT_POOL(pool) ? CNTEXT_SIZE : 0))
 #define MLX5_POOL_GET_CNT(pool, index) \
        ((struct mlx5_flow_counter *) \
        ((uint8_t *)((pool) + 1) + (index) * (MLX5_CNT_LEN(pool))))
@@ -243,13 +252,33 @@ struct mlx5_drop {
  */
 #define MLX5_MAKE_CNT_IDX(pi, offset) \
        ((pi) * MLX5_COUNTERS_PER_POOL + (offset) + 1)
-#define MLX5_CNT_TO_CNT_EXT(cnt) \
-       ((struct mlx5_flow_counter_ext *)((cnt) + 1))
+#define MLX5_CNT_TO_CNT_EXT(pool, cnt) \
+       ((struct mlx5_flow_counter_ext *)\
+       ((uint8_t *)((cnt) + 1) + \
+       (IS_AGE_POOL(pool) ? AGE_SIZE : 0)))
 #define MLX5_GET_POOL_CNT_EXT(pool, offset) \
-       MLX5_CNT_TO_CNT_EXT(MLX5_POOL_GET_CNT((pool), (offset)))
+       MLX5_CNT_TO_CNT_EXT(pool, MLX5_POOL_GET_CNT((pool), (offset)))
+#define MLX5_CNT_TO_AGE(cnt) \
+       ((struct mlx5_age_param *)((cnt) + 1))
 
 struct mlx5_flow_counter_pool;
 
+/*age status*/
+enum {
+       AGE_FREE, /* Initialized state. */
+       AGE_CANDIDATE, /* Counter assigned to flows. */
+       AGE_TMOUT, /* Timeout, wait for rte_flow_get_aged_flows and destroy. */
+};
+
+/* Counter age parameter. */
+struct mlx5_age_param {
+       rte_atomic16_t state; /**< Age state. */
+       uint16_t port_id; /**< Port id of the counter. */
+       uint32_t timeout:15; /**< Age timeout in unit of 0.1sec. */
+       uint32_t expire:16; /**< Expire time(0.1sec) in the future. */
+       void *context; /**< Flow counter age context. */
+};
+
 struct flow_counter_stats {
        uint64_t hits;
        uint64_t bytes;
@@ -299,7 +328,7 @@ struct mlx5_flow_counter_pool {
        rte_atomic64_t start_query_gen; /* Query start round. */
        rte_atomic64_t end_query_gen; /* Query end round. */
        uint32_t index; /* Pool index in container. */
-       uint32_t type: 2; /* Memory type behind the counter array. */
+       uint8_t type; /* Memory type behind the counter array. */
        rte_spinlock_t sl; /* The pool lock. */
        struct mlx5_counter_stats_raw *raw;
        struct mlx5_counter_stats_raw *raw_hw; /* The raw on HW working. */
@@ -337,18 +366,33 @@ struct mlx5_pools_container {
 
 /* Counter global management structure. */
 struct mlx5_flow_counter_mng {
-       uint8_t mhi[2]; /* master \ host container index. */
-       struct mlx5_pools_container ccont[2 * 2];
-       /* 2 containers for single and for batch for double-buffer. */
+       uint8_t mhi[2][2]; /* master \ host and age \ no age container index. */
+       struct mlx5_pools_container ccont[2 * 2][2];
+       /* master \ host and age \ no age pools container. */
        struct mlx5_counters flow_counters; /* Legacy flow counter list. */
        uint8_t pending_queries;
        uint8_t batch;
        uint16_t pool_index;
+       uint8_t age;
        uint8_t query_thread_on;
        LIST_HEAD(mem_mngs, mlx5_counter_stats_mem_mng) mem_mngs;
        LIST_HEAD(stat_raws, mlx5_counter_stats_raw) free_stat_raws;
 };
-
+#define MLX5_AGE_EVENT_NEW             1
+#define MLX5_AGE_TRIGGER               2
+#define MLX5_AGE_SET(age_info, BIT) \
+       ((age_info)->flags |= (1 << (BIT)))
+#define MLX5_AGE_GET(age_info, BIT) \
+       ((age_info)->flags & (1 << (BIT)))
+#define GET_PORT_AGE_INFO(priv) \
+       (&((priv)->sh->port[(priv)->ibv_port - 1].age_info))
+
+/* Aging information for per port. */
+struct mlx5_age_info {
+       uint8_t flags; /*Indicate if is new event or need be trigered*/
+       struct mlx5_counters aged_counters; /* Aged flow counter list. */
+       rte_spinlock_t aged_sl; /* Aged flow counter list lock. */
+};
 /* Per port data of shared IB device. */
 struct mlx5_ibv_shared_port {
        uint32_t ih_port_id;
@@ -360,6 +404,8 @@ struct mlx5_ibv_shared_port {
         * RTE_MAX_ETHPORTS it means there is no subhandler
         * installed for specified IB port index.
         */
+       struct mlx5_age_info age_info;
+       /* Aging information for per port. */
 };
 
 /* Table key of the hash organization. */
@@ -765,6 +811,8 @@ int mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
 int mlx5_flow_dev_dump(struct rte_eth_dev *dev, FILE *file,
                       struct rte_flow_error *error);
 void mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev);
+int mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
+                       uint32_t nb_contexts, struct rte_flow_error *error);
 
 /* mlx5_mp.c */
 int mlx5_mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer);
index cb593c9..a373f33 100644 (file)
@@ -24,6 +24,7 @@
 #include <rte_ether.h>
 #include <rte_ethdev_driver.h>
 #include <rte_flow.h>
+#include <rte_cycles.h>
 #include <rte_flow_driver.h>
 #include <rte_malloc.h>
 #include <rte_ip.h>
@@ -242,6 +243,7 @@ static const struct rte_flow_ops mlx5_flow_ops = {
        .isolate = mlx5_flow_isolate,
        .query = mlx5_flow_query,
        .dev_dump = mlx5_flow_dev_dump,
+       .get_aged_flows = mlx5_flow_get_aged_flows,
 };
 
 /* Convert FDIR request to Generic flow. */
@@ -2531,6 +2533,8 @@ flow_drv_validate(struct rte_eth_dev *dev,
  *   Pointer to the list of items.
  * @param[in] actions
  *   Pointer to the list of actions.
+ * @param[in] flow_idx
+ *   This memory pool index to the flow.
  * @param[out] error
  *   Pointer to the error structure.
  *
@@ -2543,14 +2547,19 @@ flow_drv_prepare(struct rte_eth_dev *dev,
                 const struct rte_flow_attr *attr,
                 const struct rte_flow_item items[],
                 const struct rte_flow_action actions[],
+                uint32_t flow_idx,
                 struct rte_flow_error *error)
 {
        const struct mlx5_flow_driver_ops *fops;
        enum mlx5_flow_drv_type type = flow->drv_type;
+       struct mlx5_flow *mlx5_flow = NULL;
 
        MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
        fops = flow_get_drv_ops(type);
-       return fops->prepare(dev, attr, items, actions, error);
+       mlx5_flow = fops->prepare(dev, attr, items, actions, error);
+       if (mlx5_flow)
+               mlx5_flow->flow_idx = flow_idx;
+       return mlx5_flow;
 }
 
 /**
@@ -3498,6 +3507,8 @@ flow_hairpin_split(struct rte_eth_dev *dev,
  *   Associated actions (list terminated by the END action).
  * @param[in] external
  *   This flow rule is created by request external to PMD.
+ * @param[in] flow_idx
+ *   This memory pool index to the flow.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  * @return
@@ -3511,11 +3522,13 @@ flow_create_split_inner(struct rte_eth_dev *dev,
                        const struct rte_flow_attr *attr,
                        const struct rte_flow_item items[],
                        const struct rte_flow_action actions[],
-                       bool external, struct rte_flow_error *error)
+                       bool external, uint32_t flow_idx,
+                       struct rte_flow_error *error)
 {
        struct mlx5_flow *dev_flow;
 
-       dev_flow = flow_drv_prepare(dev, flow, attr, items, actions, error);
+       dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
+               flow_idx, error);
        if (!dev_flow)
                return -rte_errno;
        dev_flow->flow = flow;
@@ -3876,6 +3889,8 @@ flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
  *   Associated actions (list terminated by the END action).
  * @param[in] external
  *   This flow rule is created by request external to PMD.
+ * @param[in] flow_idx
+ *   This memory pool index to the flow.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  * @return
@@ -3888,7 +3903,8 @@ flow_create_split_metadata(struct rte_eth_dev *dev,
                           const struct rte_flow_attr *attr,
                           const struct rte_flow_item items[],
                           const struct rte_flow_action actions[],
-                          bool external, struct rte_flow_error *error)
+                          bool external, uint32_t flow_idx,
+                          struct rte_flow_error *error)
 {
        struct mlx5_priv *priv = dev->data->dev_private;
        struct mlx5_dev_config *config = &priv->config;
@@ -3908,7 +3924,7 @@ flow_create_split_metadata(struct rte_eth_dev *dev,
            !mlx5_flow_ext_mreg_supported(dev))
                return flow_create_split_inner(dev, flow, NULL, prefix_layers,
                                               attr, items, actions, external,
-                                              error);
+                                              flow_idx, error);
        actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
                                                           &encap_idx);
        if (qrss) {
@@ -3992,7 +4008,7 @@ flow_create_split_metadata(struct rte_eth_dev *dev,
        /* Add the unmodified original or prefix subflow. */
        ret = flow_create_split_inner(dev, flow, &dev_flow, prefix_layers, attr,
                                      items, ext_actions ? ext_actions :
-                                     actions, external, error);
+                                     actions, external, flow_idx, error);
        if (ret < 0)
                goto exit;
        MLX5_ASSERT(dev_flow);
@@ -4055,7 +4071,7 @@ flow_create_split_metadata(struct rte_eth_dev *dev,
                ret = flow_create_split_inner(dev, flow, &dev_flow, layers,
                                              &q_attr, mtr_sfx ? items :
                                              q_items, q_actions,
-                                             external, error);
+                                             external, flow_idx, error);
                if (ret < 0)
                        goto exit;
                /* qrss ID should be freed if failed. */
@@ -4096,6 +4112,8 @@ exit:
  *   Associated actions (list terminated by the END action).
  * @param[in] external
  *   This flow rule is created by request external to PMD.
+ * @param[in] flow_idx
+ *   This memory pool index to the flow.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  * @return
@@ -4107,7 +4125,8 @@ flow_create_split_meter(struct rte_eth_dev *dev,
                           const struct rte_flow_attr *attr,
                           const struct rte_flow_item items[],
                           const struct rte_flow_action actions[],
-                          bool external, struct rte_flow_error *error)
+                          bool external, uint32_t flow_idx,
+                          struct rte_flow_error *error)
 {
        struct mlx5_priv *priv = dev->data->dev_private;
        struct rte_flow_action *sfx_actions = NULL;
@@ -4151,7 +4170,7 @@ flow_create_split_meter(struct rte_eth_dev *dev,
                /* Add the prefix subflow. */
                ret = flow_create_split_inner(dev, flow, &dev_flow, 0, attr,
                                              items, pre_actions, external,
-                                             error);
+                                             flow_idx, error);
                if (ret) {
                        ret = -rte_errno;
                        goto exit;
@@ -4168,7 +4187,7 @@ flow_create_split_meter(struct rte_eth_dev *dev,
                                         0, &sfx_attr,
                                         sfx_items ? sfx_items : items,
                                         sfx_actions ? sfx_actions : actions,
-                                        external, error);
+                                        external, flow_idx, error);
 exit:
        if (sfx_actions)
                rte_free(sfx_actions);
@@ -4205,6 +4224,8 @@ exit:
  *   Associated actions (list terminated by the END action).
  * @param[in] external
  *   This flow rule is created by request external to PMD.
+ * @param[in] flow_idx
+ *   This memory pool index to the flow.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  * @return
@@ -4216,12 +4237,13 @@ flow_create_split_outer(struct rte_eth_dev *dev,
                        const struct rte_flow_attr *attr,
                        const struct rte_flow_item items[],
                        const struct rte_flow_action actions[],
-                       bool external, struct rte_flow_error *error)
+                       bool external, uint32_t flow_idx,
+                       struct rte_flow_error *error)
 {
        int ret;
 
        ret = flow_create_split_meter(dev, flow, attr, items,
-                                        actions, external, error);
+                                        actions, external, flow_idx, error);
        MLX5_ASSERT(ret <= 0);
        return ret;
 }
@@ -4356,7 +4378,7 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
                 */
                ret = flow_create_split_outer(dev, flow, attr,
                                              buf->entry[i].pattern,
-                                             p_actions_rx, external,
+                                             p_actions_rx, external, idx,
                                              error);
                if (ret < 0)
                        goto error;
@@ -4367,7 +4389,8 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
                attr_tx.ingress = 0;
                attr_tx.egress = 1;
                dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
-                                           actions_hairpin_tx.actions, error);
+                                        actions_hairpin_tx.actions,
+                                        idx, error);
                if (!dev_flow)
                        goto error;
                dev_flow->flow = flow;
@@ -5747,6 +5770,31 @@ mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
 
 #define MLX5_POOL_QUERY_FREQ_US 1000000
 
+/**
+ * Get number of all validate pools.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_ibv_shared object.
+ *
+ * @return
+ *   The number of all validate pools.
+ */
+static uint32_t
+mlx5_get_all_valid_pool_count(struct mlx5_ibv_shared *sh)
+{
+       uint8_t age, i;
+       uint32_t pools_n = 0;
+       struct mlx5_pools_container *cont;
+
+       for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
+               for (i = 0; i < 2 ; ++i) {
+                       cont = MLX5_CNT_CONTAINER(sh, i, 0, age);
+                       pools_n += rte_atomic16_read(&cont->n_valid);
+               }
+       }
+       return pools_n;
+}
+
 /**
  * Set the periodic procedure for triggering asynchronous batch queries for all
  * the counter pools.
@@ -5757,12 +5805,9 @@ mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
 void
 mlx5_set_query_alarm(struct mlx5_ibv_shared *sh)
 {
-       struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(sh, 0, 0);
-       uint32_t pools_n = rte_atomic16_read(&cont->n_valid);
-       uint32_t us;
+       uint32_t pools_n, us;
 
-       cont = MLX5_CNT_CONTAINER(sh, 1, 0);
-       pools_n += rte_atomic16_read(&cont->n_valid);
+       pools_n = mlx5_get_all_valid_pool_count(sh);
        us = MLX5_POOL_QUERY_FREQ_US / pools_n;
        DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
        if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
@@ -5788,6 +5833,7 @@ mlx5_flow_query_alarm(void *arg)
        uint16_t offset;
        int ret;
        uint8_t batch = sh->cmng.batch;
+       uint8_t age = sh->cmng.age;
        uint16_t pool_index = sh->cmng.pool_index;
        struct mlx5_pools_container *cont;
        struct mlx5_pools_container *mcont;
@@ -5796,8 +5842,8 @@ mlx5_flow_query_alarm(void *arg)
        if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
                goto set_alarm;
 next_container:
-       cont = MLX5_CNT_CONTAINER(sh, batch, 1);
-       mcont = MLX5_CNT_CONTAINER(sh, batch, 0);
+       cont = MLX5_CNT_CONTAINER(sh, batch, 1, age);
+       mcont = MLX5_CNT_CONTAINER(sh, batch, 0, age);
        /* Check if resize was done and need to flip a container. */
        if (cont != mcont) {
                if (cont->pools) {
@@ -5807,15 +5853,22 @@ next_container:
                }
                rte_cio_wmb();
                 /* Flip the host container. */
-               sh->cmng.mhi[batch] ^= (uint8_t)2;
+               sh->cmng.mhi[batch][age] ^= (uint8_t)2;
                cont = mcont;
        }
        if (!cont->pools) {
                /* 2 empty containers case is unexpected. */
-               if (unlikely(batch != sh->cmng.batch))
+               if (unlikely(batch != sh->cmng.batch) &&
+                       unlikely(age != sh->cmng.age)) {
                        goto set_alarm;
+               }
                batch ^= 0x1;
                pool_index = 0;
+               if (batch == 0 && pool_index == 0) {
+                       age ^= 0x1;
+                       sh->cmng.batch = batch;
+                       sh->cmng.age = age;
+               }
                goto next_container;
        }
        pool = cont->pools[pool_index];
@@ -5858,13 +5911,80 @@ next_container:
        if (pool_index >= rte_atomic16_read(&cont->n_valid)) {
                batch ^= 0x1;
                pool_index = 0;
+               if (batch == 0 && pool_index == 0)
+                       age ^= 0x1;
        }
 set_alarm:
        sh->cmng.batch = batch;
        sh->cmng.pool_index = pool_index;
+       sh->cmng.age = age;
        mlx5_set_query_alarm(sh);
 }
 
+/**
+ * Check and callback event for new aged flow in the counter pool
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_ibv_shared object.
+ * @param[in] pool
+ *   Pointer to Current counter pool.
+ */
+static void
+mlx5_flow_aging_check(struct mlx5_ibv_shared *sh,
+                  struct mlx5_flow_counter_pool *pool)
+{
+       struct mlx5_priv *priv;
+       struct mlx5_flow_counter *cnt;
+       struct mlx5_age_info *age_info;
+       struct mlx5_age_param *age_param;
+       struct mlx5_counter_stats_raw *cur = pool->raw_hw;
+       struct mlx5_counter_stats_raw *prev = pool->raw;
+       uint16_t curr = rte_rdtsc() / (rte_get_tsc_hz() / 10);
+       uint32_t i;
+
+       for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
+               cnt = MLX5_POOL_GET_CNT(pool, i);
+               age_param = MLX5_CNT_TO_AGE(cnt);
+               if (rte_atomic16_read(&age_param->state) != AGE_CANDIDATE)
+                       continue;
+               if (cur->data[i].hits != prev->data[i].hits) {
+                       age_param->expire = curr + age_param->timeout;
+                       continue;
+               }
+               if ((uint16_t)(curr - age_param->expire) >= (UINT16_MAX / 2))
+                       continue;
+               /**
+                * Hold the lock first, or if between the
+                * state AGE_TMOUT and tailq operation the
+                * release happened, the release procedure
+                * may delete a non-existent tailq node.
+                */
+               priv = rte_eth_devices[age_param->port_id].data->dev_private;
+               age_info = GET_PORT_AGE_INFO(priv);
+               rte_spinlock_lock(&age_info->aged_sl);
+               /* If the cpmset fails, release happens. */
+               if (rte_atomic16_cmpset((volatile uint16_t *)
+                                       &age_param->state,
+                                       AGE_CANDIDATE,
+                                       AGE_TMOUT) ==
+                                       AGE_CANDIDATE) {
+                       TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
+                       MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
+               }
+               rte_spinlock_unlock(&age_info->aged_sl);
+       }
+       for (i = 0; i < sh->max_port; i++) {
+               age_info = &sh->port[i].age_info;
+               if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW))
+                       continue;
+               if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER))
+                       _rte_eth_dev_callback_process
+                               (&rte_eth_devices[sh->port[i].devx_ih_port_id],
+                               RTE_ETH_EVENT_FLOW_AGED, NULL);
+               age_info->flags = 0;
+       }
+}
+
 /**
  * Handler for the HW respond about ready values from an asynchronous batch
  * query. This function is probably called by the host thread.
@@ -5889,6 +6009,8 @@ mlx5_flow_async_pool_query_handle(struct mlx5_ibv_shared *sh,
                raw_to_free = pool->raw_hw;
        } else {
                raw_to_free = pool->raw;
+               if (IS_AGE_POOL(pool))
+                       mlx5_flow_aging_check(sh, pool);
                rte_spinlock_lock(&pool->sl);
                pool->raw = pool->raw_hw;
                rte_spinlock_unlock(&pool->sl);
@@ -6040,3 +6162,40 @@ mlx5_flow_dev_dump(struct rte_eth_dev *dev,
        return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain,
                                       sh->tx_domain, file);
 }
+
+/**
+ * Get aged-out flows.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] context
+ *   The address of an array of pointers to the aged-out flows contexts.
+ * @param[in] nb_countexts
+ *   The length of context array pointers.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. Initialized in case of
+ *   error only.
+ *
+ * @return
+ *   how many contexts get in success, otherwise negative errno value.
+ *   if nb_contexts is 0, return the amount of all aged contexts.
+ *   if nb_contexts is not 0 , return the amount of aged flows reported
+ *   in the context array.
+ */
+int
+mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
+                       uint32_t nb_contexts, struct rte_flow_error *error)
+{
+       const struct mlx5_flow_driver_ops *fops;
+       struct rte_flow_attr attr = { .transfer = 0 };
+
+       if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
+               fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+               return fops->get_aged_flows(dev, contexts, nb_contexts,
+                                                   error);
+       }
+       DRV_LOG(ERR,
+               "port %u get aged flows is not supported.",
+                dev->data->port_id);
+       return -ENOTSUP;
+}
index 75b8288..7f5e01f 100644 (file)
@@ -199,6 +199,7 @@ enum mlx5_feature_name {
 #define MLX5_FLOW_ACTION_METER (1ull << 31)
 #define MLX5_FLOW_ACTION_SET_IPV4_DSCP (1ull << 32)
 #define MLX5_FLOW_ACTION_SET_IPV6_DSCP (1ull << 33)
+#define MLX5_FLOW_ACTION_AGE (1ull << 34)
 
 #define MLX5_FLOW_FATE_ACTIONS \
        (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | \
@@ -652,6 +653,7 @@ struct mlx5_flow_verbs_workspace {
 /** Device flow structure. */
 struct mlx5_flow {
        struct rte_flow *flow; /**< Pointer to the main flow. */
+       uint32_t flow_idx; /**< The memory pool index to the main flow. */
        uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
        uint64_t act_flags;
        /**< Bit-fields of detected actions, see MLX5_FLOW_ACTION_*. */
@@ -875,6 +877,11 @@ typedef int (*mlx5_flow_counter_query_t)(struct rte_eth_dev *dev,
                                         uint32_t cnt,
                                         bool clear, uint64_t *pkts,
                                         uint64_t *bytes);
+typedef int (*mlx5_flow_get_aged_flows_t)
+                                       (struct rte_eth_dev *dev,
+                                        void **context,
+                                        uint32_t nb_contexts,
+                                        struct rte_flow_error *error);
 struct mlx5_flow_driver_ops {
        mlx5_flow_validate_t validate;
        mlx5_flow_prepare_t prepare;
@@ -890,13 +897,14 @@ struct mlx5_flow_driver_ops {
        mlx5_flow_counter_alloc_t counter_alloc;
        mlx5_flow_counter_free_t counter_free;
        mlx5_flow_counter_query_t counter_query;
+       mlx5_flow_get_aged_flows_t get_aged_flows;
 };
 
 
-#define MLX5_CNT_CONTAINER(sh, batch, thread) (&(sh)->cmng.ccont \
-       [(((sh)->cmng.mhi[batch] >> (thread)) & 0x1) * 2 + (batch)])
-#define MLX5_CNT_CONTAINER_UNUSED(sh, batch, thread) (&(sh)->cmng.ccont \
-       [(~((sh)->cmng.mhi[batch] >> (thread)) & 0x1) * 2 + (batch)])
+#define MLX5_CNT_CONTAINER(sh, batch, thread, age) (&(sh)->cmng.ccont \
+       [(((sh)->cmng.mhi[batch][age] >> (thread)) & 0x1) * 2 + (batch)][age])
+#define MLX5_CNT_CONTAINER_UNUSED(sh, batch, thread, age) (&(sh)->cmng.ccont \
+       [(~((sh)->cmng.mhi[batch][age] >> (thread)) & 0x1) * 2 + (batch)][age])
 
 /* mlx5_flow.c */
 
index 99df3c5..c8035b3 100644 (file)
@@ -24,6 +24,7 @@
 #include <rte_flow.h>
 #include <rte_flow_driver.h>
 #include <rte_malloc.h>
+#include <rte_cycles.h>
 #include <rte_ip.h>
 #include <rte_gre.h>
 #include <rte_vxlan.h>
@@ -3716,6 +3717,50 @@ mlx5_flow_validate_action_meter(struct rte_eth_dev *dev,
        return 0;
 }
 
+/**
+ * Validate the age action.
+ *
+ * @param[in] action_flags
+ *   Holds the actions detected until now.
+ * @param[in] action
+ *   Pointer to the age action.
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_action_age(uint64_t action_flags,
+                           const struct rte_flow_action *action,
+                           struct rte_eth_dev *dev,
+                           struct rte_flow_error *error)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+       const struct rte_flow_action_age *age = action->conf;
+
+       if (!priv->config.devx || priv->counter_fallback)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                         NULL,
+                                         "age action not supported");
+       if (!(action->conf))
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, action,
+                                         "configuration cannot be null");
+       if (age->timeout >= UINT16_MAX / 2 / 10)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, action,
+                                         "Max age time: 3275 seconds");
+       if (action_flags & MLX5_FLOW_ACTION_AGE)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+                                         "Duplicate age ctions set");
+       return 0;
+}
+
 /**
  * Validate the modify-header IPv4 DSCP actions.
  *
@@ -3893,14 +3938,16 @@ flow_dv_counter_get_by_idx(struct rte_eth_dev *dev,
        struct mlx5_priv *priv = dev->data->dev_private;
        struct mlx5_pools_container *cont;
        struct mlx5_flow_counter_pool *pool;
-       uint32_t batch = 0;
+       uint32_t batch = 0, age = 0;
 
        idx--;
+       age = MLX_CNT_IS_AGE(idx);
+       idx = age ? idx - MLX5_CNT_AGE_OFFSET : idx;
        if (idx >= MLX5_CNT_BATCH_OFFSET) {
                idx -= MLX5_CNT_BATCH_OFFSET;
                batch = 1;
        }
-       cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0);
+       cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
        MLX5_ASSERT(idx / MLX5_COUNTERS_PER_POOL < cont->n);
        pool = cont->pools[idx / MLX5_COUNTERS_PER_POOL];
        MLX5_ASSERT(pool);
@@ -4020,18 +4067,21 @@ flow_dv_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n)
  *   Pointer to the Ethernet device structure.
  * @param[in] batch
  *   Whether the pool is for counter that was allocated by batch command.
+ * @param[in] age
+ *   Whether the pool is for Aging counter.
  *
  * @return
  *   The new container pointer on success, otherwise NULL and rte_errno is set.
  */
 static struct mlx5_pools_container *
-flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch)
+flow_dv_container_resize(struct rte_eth_dev *dev,
+                               uint32_t batch, uint32_t age)
 {
        struct mlx5_priv *priv = dev->data->dev_private;
        struct mlx5_pools_container *cont =
-                       MLX5_CNT_CONTAINER(priv->sh, batch, 0);
+                       MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
        struct mlx5_pools_container *new_cont =
-                       MLX5_CNT_CONTAINER_UNUSED(priv->sh, batch, 0);
+                       MLX5_CNT_CONTAINER_UNUSED(priv->sh, batch, 0, age);
        struct mlx5_counter_stats_mem_mng *mem_mng = NULL;
        uint32_t resize = cont->n + MLX5_CNT_CONTAINER_RESIZE;
        uint32_t mem_size = sizeof(struct mlx5_flow_counter_pool *) * resize;
@@ -4039,7 +4089,7 @@ flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch)
 
        /* Fallback mode has no background thread. Skip the check. */
        if (!priv->counter_fallback &&
-           cont != MLX5_CNT_CONTAINER(priv->sh, batch, 1)) {
+           cont != MLX5_CNT_CONTAINER(priv->sh, batch, 1, age)) {
                /* The last resize still hasn't detected by the host thread. */
                rte_errno = EAGAIN;
                return NULL;
@@ -4082,7 +4132,7 @@ flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch)
        new_cont->init_mem_mng = mem_mng;
        rte_cio_wmb();
         /* Flip the master container. */
-       priv->sh->cmng.mhi[batch] ^= (uint8_t)1;
+       priv->sh->cmng.mhi[batch][age] ^= (uint8_t)1;
        return new_cont;
 }
 
@@ -4114,7 +4164,7 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts,
        cnt = flow_dv_counter_get_by_idx(dev, counter, &pool);
        MLX5_ASSERT(pool);
        if (counter < MLX5_CNT_BATCH_OFFSET) {
-               cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
+               cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
                if (priv->counter_fallback)
                        return mlx5_devx_cmd_flow_counter_query(cnt_ext->dcs, 0,
                                        0, pkts, bytes, 0, NULL, NULL, 0);
@@ -4147,6 +4197,8 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts,
  *   The devX counter handle.
  * @param[in] batch
  *   Whether the pool is for counter that was allocated by batch command.
+ * @param[in] age
+ *   Whether the pool is for counter that was allocated for aging.
  * @param[in/out] cont_cur
  *   Pointer to the container pointer, it will be update in pool resize.
  *
@@ -4155,24 +4207,23 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts,
  */
 static struct mlx5_pools_container *
 flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
-                   uint32_t batch)
+                   uint32_t batch, uint32_t age)
 {
        struct mlx5_priv *priv = dev->data->dev_private;
        struct mlx5_flow_counter_pool *pool;
        struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch,
-                                                              0);
+                                                              0, age);
        int16_t n_valid = rte_atomic16_read(&cont->n_valid);
-       uint32_t size;
+       uint32_t size = sizeof(*pool);
 
        if (cont->n == n_valid) {
-               cont = flow_dv_container_resize(dev, batch);
+               cont = flow_dv_container_resize(dev, batch, age);
                if (!cont)
                        return NULL;
        }
-       size = sizeof(*pool);
        size += MLX5_COUNTERS_PER_POOL * CNT_SIZE;
-       if (!batch)
-               size += MLX5_COUNTERS_PER_POOL * CNTEXT_SIZE;
+       size += (batch ? 0 : MLX5_COUNTERS_PER_POOL * CNTEXT_SIZE);
+       size += (!age ? 0 : MLX5_COUNTERS_PER_POOL * AGE_SIZE);
        pool = rte_calloc(__func__, 1, size, 0);
        if (!pool) {
                rte_errno = ENOMEM;
@@ -4184,8 +4235,8 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
                                                     MLX5_CNT_CONTAINER_RESIZE;
        pool->raw_hw = NULL;
        pool->type = 0;
-       if (!batch)
-               pool->type |= CNT_POOL_TYPE_EXT;
+       pool->type |= (batch ? 0 :  CNT_POOL_TYPE_EXT);
+       pool->type |= (!age ? 0 :  CNT_POOL_TYPE_AGE);
        rte_spinlock_init(&pool->sl);
        /*
         * The generation of the new allocated counters in this pool is 0, 2 in
@@ -4212,6 +4263,39 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
        return cont;
 }
 
+/**
+ * Update the minimum dcs-id for aged or no-aged counter pool.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] pool
+ *   Current counter pool.
+ * @param[in] batch
+ *   Whether the pool is for counter that was allocated by batch command.
+ * @param[in] age
+ *   Whether the counter is for aging.
+ */
+static void
+flow_dv_counter_update_min_dcs(struct rte_eth_dev *dev,
+                       struct mlx5_flow_counter_pool *pool,
+                       uint32_t batch, uint32_t age)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+       struct mlx5_flow_counter_pool *other;
+       struct mlx5_pools_container *cont;
+
+       cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, (age ^ 0x1));
+       other = flow_dv_find_pool_by_id(cont, pool->min_dcs->id);
+       if (!other)
+               return;
+       if (pool->min_dcs->id < other->min_dcs->id) {
+               rte_atomic64_set(&other->a64_dcs,
+                       rte_atomic64_read(&pool->a64_dcs));
+       } else {
+               rte_atomic64_set(&pool->a64_dcs,
+                       rte_atomic64_read(&other->a64_dcs));
+       }
+}
 /**
  * Prepare a new counter and/or a new counter pool.
  *
@@ -4221,6 +4305,8 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
  *   Where to put the pointer of a new counter.
  * @param[in] batch
  *   Whether the pool is for counter that was allocated by batch command.
+ * @param[in] age
+ *   Whether the pool is for counter that was allocated for aging.
  *
  * @return
  *   The counter container pointer and @p cnt_free is set on success,
@@ -4229,7 +4315,7 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
 static struct mlx5_pools_container *
 flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
                             struct mlx5_flow_counter **cnt_free,
-                            uint32_t batch)
+                            uint32_t batch, uint32_t age)
 {
        struct mlx5_priv *priv = dev->data->dev_private;
        struct mlx5_pools_container *cont;
@@ -4238,7 +4324,7 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
        struct mlx5_flow_counter *cnt;
        uint32_t i;
 
-       cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0);
+       cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
        if (!batch) {
                /* bulk_bitmap must be 0 for single counter allocation. */
                dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0);
@@ -4246,7 +4332,7 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
                        return NULL;
                pool = flow_dv_find_pool_by_id(cont, dcs->id);
                if (!pool) {
-                       cont = flow_dv_pool_create(dev, dcs, batch);
+                       cont = flow_dv_pool_create(dev, dcs, batch, age);
                        if (!cont) {
                                mlx5_devx_cmd_destroy(dcs);
                                return NULL;
@@ -4256,6 +4342,8 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
                        rte_atomic64_set(&pool->a64_dcs,
                                         (int64_t)(uintptr_t)dcs);
                }
+               flow_dv_counter_update_min_dcs(dev,
+                                               pool, batch, age);
                i = dcs->id % MLX5_COUNTERS_PER_POOL;
                cnt = MLX5_POOL_GET_CNT(pool, i);
                TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
@@ -4270,7 +4358,7 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
                rte_errno = ENODATA;
                return NULL;
        }
-       cont = flow_dv_pool_create(dev, dcs, batch);
+       cont = flow_dv_pool_create(dev, dcs, batch, age);
        if (!cont) {
                mlx5_devx_cmd_destroy(dcs);
                return NULL;
@@ -4331,13 +4419,15 @@ flow_dv_counter_shared_search(struct mlx5_pools_container *cont, uint32_t id,
  *   Counter identifier.
  * @param[in] group
  *   Counter flow group.
+ * @param[in] age
+ *   Whether the counter was allocated for aging.
  *
  * @return
  *   Index to flow counter on success, 0 otherwise and rte_errno is set.
  */
 static uint32_t
 flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
-                     uint16_t group)
+                     uint16_t group, uint32_t age)
 {
        struct mlx5_priv *priv = dev->data->dev_private;
        struct mlx5_flow_counter_pool *pool = NULL;
@@ -4353,7 +4443,7 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
         */
        uint32_t batch = (group && !shared && !priv->counter_fallback) ? 1 : 0;
        struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch,
-                                                              0);
+                                                              0, age);
        uint32_t cnt_idx;
 
        if (!priv->config.devx) {
@@ -4392,13 +4482,13 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
                cnt_free = NULL;
        }
        if (!cnt_free) {
-               cont = flow_dv_counter_pool_prepare(dev, &cnt_free, batch);
+               cont = flow_dv_counter_pool_prepare(dev, &cnt_free, batch, age);
                if (!cont)
                        return 0;
                pool = TAILQ_FIRST(&cont->pool_list);
        }
        if (!batch)
-               cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt_free);
+               cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt_free);
        /* Create a DV counter action only in the first time usage. */
        if (!cnt_free->action) {
                uint16_t offset;
@@ -4421,6 +4511,7 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
        cnt_idx = MLX5_MAKE_CNT_IDX(pool->index,
                                MLX5_CNT_ARRAY_IDX(pool, cnt_free));
        cnt_idx += batch * MLX5_CNT_BATCH_OFFSET;
+       cnt_idx += age * MLX5_CNT_AGE_OFFSET;
        /* Update the counter reset values. */
        if (_flow_dv_query_count(dev, cnt_idx, &cnt_free->hits,
                                 &cnt_free->bytes))
@@ -4442,6 +4533,64 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
        return cnt_idx;
 }
 
+/**
+ * Get age param from counter index.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] counter
+ *   Index to the counter handler.
+ *
+ * @return
+ *   The aging parameter specified for the counter index.
+ */
+static struct mlx5_age_param*
+flow_dv_counter_idx_get_age(struct rte_eth_dev *dev,
+                               uint32_t counter)
+{
+       struct mlx5_flow_counter *cnt;
+       struct mlx5_flow_counter_pool *pool = NULL;
+
+       flow_dv_counter_get_by_idx(dev, counter, &pool);
+       counter = (counter - 1) % MLX5_COUNTERS_PER_POOL;
+       cnt = MLX5_POOL_GET_CNT(pool, counter);
+       return MLX5_CNT_TO_AGE(cnt);
+}
+
+/**
+ * Remove a flow counter from aged counter list.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] counter
+ *   Index to the counter handler.
+ * @param[in] cnt
+ *   Pointer to the counter handler.
+ */
+static void
+flow_dv_counter_remove_from_age(struct rte_eth_dev *dev,
+                               uint32_t counter, struct mlx5_flow_counter *cnt)
+{
+       struct mlx5_age_info *age_info;
+       struct mlx5_age_param *age_param;
+       struct mlx5_priv *priv = dev->data->dev_private;
+
+       age_info = GET_PORT_AGE_INFO(priv);
+       age_param = flow_dv_counter_idx_get_age(dev, counter);
+       if (rte_atomic16_cmpset((volatile uint16_t *)
+                       &age_param->state,
+                       AGE_CANDIDATE, AGE_FREE)
+                       != AGE_CANDIDATE) {
+               /**
+                * We need the lock even it is age timeout,
+                * since counter may still in process.
+                */
+               rte_spinlock_lock(&age_info->aged_sl);
+               TAILQ_REMOVE(&age_info->aged_counters, cnt, next);
+               rte_spinlock_unlock(&age_info->aged_sl);
+       }
+       rte_atomic16_set(&age_param->state, AGE_FREE);
+}
 /**
  * Release a flow counter.
  *
@@ -4462,10 +4611,12 @@ flow_dv_counter_release(struct rte_eth_dev *dev, uint32_t counter)
        cnt = flow_dv_counter_get_by_idx(dev, counter, &pool);
        MLX5_ASSERT(pool);
        if (counter < MLX5_CNT_BATCH_OFFSET) {
-               cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
+               cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
                if (cnt_ext && --cnt_ext->ref_cnt)
                        return;
        }
+       if (IS_AGE_POOL(pool))
+               flow_dv_counter_remove_from_age(dev, counter, cnt);
        /* Put the counter in the end - the last updated one. */
        TAILQ_INSERT_TAIL(&pool->counters, cnt, next);
        /*
@@ -5240,6 +5391,15 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
                        /* Meter action will add one more TAG action. */
                        rw_act_num += MLX5_ACT_NUM_SET_TAG;
                        break;
+               case RTE_FLOW_ACTION_TYPE_AGE:
+                       ret = flow_dv_validate_action_age(action_flags,
+                                                         actions, dev,
+                                                         error);
+                       if (ret < 0)
+                               return ret;
+                       action_flags |= MLX5_FLOW_ACTION_AGE;
+                       ++actions_n;
+                       break;
                case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
                        ret = flow_dv_validate_action_modify_ipv4_dscp
                                                         (action_flags,
@@ -7278,6 +7438,53 @@ flow_dv_translate_action_port_id(struct rte_eth_dev *dev,
        return 0;
 }
 
+/**
+ * Create a counter with aging configuration.
+ *
+ * @param[in] dev
+ *   Pointer to rte_eth_dev structure.
+ * @param[out] count
+ *   Pointer to the counter action configuration.
+ * @param[in] age
+ *   Pointer to the aging action configuration.
+ *
+ * @return
+ *   Index to flow counter on success, 0 otherwise.
+ */
+static uint32_t
+flow_dv_translate_create_counter(struct rte_eth_dev *dev,
+                               struct mlx5_flow *dev_flow,
+                               const struct rte_flow_action_count *count,
+                               const struct rte_flow_action_age *age)
+{
+       uint32_t counter;
+       struct mlx5_age_param *age_param;
+
+       counter = flow_dv_counter_alloc(dev,
+                               count ? count->shared : 0,
+                               count ? count->id : 0,
+                               dev_flow->dv.group, !!age);
+       if (!counter || age == NULL)
+               return counter;
+       age_param  = flow_dv_counter_idx_get_age(dev, counter);
+       /*
+        * The counter age accuracy may have a bit delay. Have 3/4
+        * second bias on the timeount in order to let it age in time.
+        */
+       age_param->context = age->context ? age->context :
+               (void *)(uintptr_t)(dev_flow->flow_idx);
+       /*
+        * The counter age accuracy may have a bit delay. Have 3/4
+        * second bias on the timeount in order to let it age in time.
+        */
+       age_param->timeout = age->timeout * 10 - MLX5_AGING_TIME_DELAY;
+       /* Set expire time in unit of 0.1 sec. */
+       age_param->port_id = dev->data->port_id;
+       age_param->expire = age_param->timeout +
+                       rte_rdtsc() / (rte_get_tsc_hz() / 10);
+       rte_atomic16_set(&age_param->state, AGE_CANDIDATE);
+       return counter;
+}
 /**
  * Add Tx queue matcher
  *
@@ -7447,6 +7654,8 @@ __flow_dv_translate(struct rte_eth_dev *dev,
                            (MLX5_MAX_MODIFY_NUM + 1)];
        } mhdr_dummy;
        struct mlx5_flow_dv_modify_hdr_resource *mhdr_res = &mhdr_dummy.res;
+       const struct rte_flow_action_count *count = NULL;
+       const struct rte_flow_action_age *age = NULL;
        union flow_dv_attr flow_attr = { .attr = 0 };
        uint32_t tag_be;
        union mlx5_flow_tbl_key tbl_key;
@@ -7475,7 +7684,6 @@ __flow_dv_translate(struct rte_eth_dev *dev,
                const struct rte_flow_action_queue *queue;
                const struct rte_flow_action_rss *rss;
                const struct rte_flow_action *action = actions;
-               const struct rte_flow_action_count *count = action->conf;
                const uint8_t *rss_key;
                const struct rte_flow_action_jump *jump_data;
                const struct rte_flow_action_meter *mtr;
@@ -7604,36 +7812,21 @@ __flow_dv_translate(struct rte_eth_dev *dev,
                        action_flags |= MLX5_FLOW_ACTION_RSS;
                        dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE;
                        break;
+               case RTE_FLOW_ACTION_TYPE_AGE:
                case RTE_FLOW_ACTION_TYPE_COUNT:
                        if (!dev_conf->devx) {
-                               rte_errno = ENOTSUP;
-                               goto cnt_err;
-                       }
-                       flow->counter = flow_dv_counter_alloc(dev,
-                                                       count->shared,
-                                                       count->id,
-                                                       dev_flow->dv.group);
-                       if (!flow->counter)
-                               goto cnt_err;
-                       dev_flow->dv.actions[actions_n++] =
-                                 (flow_dv_counter_get_by_idx(dev,
-                                 flow->counter, NULL))->action;
-                       action_flags |= MLX5_FLOW_ACTION_COUNT;
-                       break;
-cnt_err:
-                       if (rte_errno == ENOTSUP)
                                return rte_flow_error_set
                                              (error, ENOTSUP,
                                               RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
                                               NULL,
                                               "count action not supported");
+                       }
+                       /* Save information first, will apply later. */
+                       if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT)
+                               count = action->conf;
                        else
-                               return rte_flow_error_set
-                                               (error, rte_errno,
-                                                RTE_FLOW_ERROR_TYPE_ACTION,
-                                                action,
-                                                "cannot create counter"
-                                                 " object.");
+                               age = action->conf;
+                       action_flags |= MLX5_FLOW_ACTION_COUNT;
                        break;
                case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
                        dev_flow->dv.actions[actions_n++] =
@@ -7907,6 +8100,22 @@ cnt_err:
                                dev_flow->dv.actions[modify_action_position] =
                                        handle->dvh.modify_hdr->verbs_action;
                        }
+                       if (action_flags & MLX5_FLOW_ACTION_COUNT) {
+                               flow->counter =
+                                       flow_dv_translate_create_counter(dev,
+                                               dev_flow, count, age);
+
+                               if (!flow->counter)
+                                       return rte_flow_error_set
+                                               (error, rte_errno,
+                                               RTE_FLOW_ERROR_TYPE_ACTION,
+                                               NULL,
+                                               "cannot create counter"
+                                               " object.");
+                               dev_flow->dv.actions[actions_n++] =
+                                         (flow_dv_counter_get_by_idx(dev,
+                                         flow->counter, NULL))->action;
+                       }
                        break;
                default:
                        break;
@@ -9168,6 +9377,60 @@ flow_dv_counter_query(struct rte_eth_dev *dev, uint32_t counter, bool clear,
        return 0;
 }
 
+/**
+ * Get aged-out flows.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] context
+ *   The address of an array of pointers to the aged-out flows contexts.
+ * @param[in] nb_contexts
+ *   The length of context array pointers.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. Initialized in case of
+ *   error only.
+ *
+ * @return
+ *   how many contexts get in success, otherwise negative errno value.
+ *   if nb_contexts is 0, return the amount of all aged contexts.
+ *   if nb_contexts is not 0 , return the amount of aged flows reported
+ *   in the context array.
+ * @note: only stub for now
+ */
+static int
+flow_get_aged_flows(struct rte_eth_dev *dev,
+                   void **context,
+                   uint32_t nb_contexts,
+                   struct rte_flow_error *error)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+       struct mlx5_age_info *age_info;
+       struct mlx5_age_param *age_param;
+       struct mlx5_flow_counter *counter;
+       int nb_flows = 0;
+
+       if (nb_contexts && !context)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                         NULL,
+                                         "Should assign at least one flow or"
+                                         " context to get if nb_contexts != 0");
+       age_info = GET_PORT_AGE_INFO(priv);
+       rte_spinlock_lock(&age_info->aged_sl);
+       TAILQ_FOREACH(counter, &age_info->aged_counters, next) {
+               nb_flows++;
+               if (nb_contexts) {
+                       age_param = MLX5_CNT_TO_AGE(counter);
+                       context[nb_flows - 1] = age_param->context;
+                       if (!(--nb_contexts))
+                               break;
+               }
+       }
+       rte_spinlock_unlock(&age_info->aged_sl);
+       MLX5_AGE_SET(age_info, MLX5_AGE_TRIGGER);
+       return nb_flows;
+}
+
 /*
  * Mutex-protected thunk to lock-free  __flow_dv_translate().
  */
@@ -9234,7 +9497,7 @@ flow_dv_counter_allocate(struct rte_eth_dev *dev)
        uint32_t cnt;
 
        flow_dv_shared_lock(dev);
-       cnt = flow_dv_counter_alloc(dev, 0, 0, 1);
+       cnt = flow_dv_counter_alloc(dev, 0, 0, 1, 0);
        flow_dv_shared_unlock(dev);
        return cnt;
 }
@@ -9265,6 +9528,7 @@ const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops = {
        .counter_alloc = flow_dv_counter_allocate,
        .counter_free = flow_dv_counter_free,
        .counter_query = flow_dv_counter_query,
+       .get_aged_flows = flow_get_aged_flows,
 };
 
 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
index 236d665..7efd97f 100644 (file)
@@ -56,7 +56,8 @@ flow_verbs_counter_get_by_idx(struct rte_eth_dev *dev,
                              struct mlx5_flow_counter_pool **ppool)
 {
        struct mlx5_priv *priv = dev->data->dev_private;
-       struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0);
+       struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0,
+                                                                       0);
        struct mlx5_flow_counter_pool *pool;
 
        idx--;
@@ -151,7 +152,8 @@ static uint32_t
 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
 {
        struct mlx5_priv *priv = dev->data->dev_private;
-       struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0);
+       struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0,
+                                                                       0);
        struct mlx5_flow_counter_pool *pool = NULL;
        struct mlx5_flow_counter_ext *cnt_ext = NULL;
        struct mlx5_flow_counter *cnt = NULL;
@@ -251,7 +253,7 @@ flow_verbs_counter_release(struct rte_eth_dev *dev, uint32_t counter)
 
        cnt = flow_verbs_counter_get_by_idx(dev, counter,
                                            &pool);
-       cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
+       cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
        if (--cnt_ext->ref_cnt == 0) {
 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
                claim_zero(mlx5_glue->destroy_counter_set(cnt_ext->cs));
@@ -282,7 +284,7 @@ flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
                struct mlx5_flow_counter *cnt = flow_verbs_counter_get_by_idx
                                                (dev, flow->counter, &pool);
                struct mlx5_flow_counter_ext *cnt_ext = MLX5_CNT_TO_CNT_EXT
-                                               (cnt);
+                                               (pool, cnt);
                struct rte_flow_query_count *qc = data;
                uint64_t counters[2] = {0, 0};
 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
@@ -1083,12 +1085,12 @@ flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
        }
 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
        cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
-       cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
+       cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
        counter.counter_set_handle = cnt_ext->cs->handle;
        flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
        cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
-       cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
+       cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
        counter.counters = cnt_ext->cs;
        flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
 #endif