* Added support for match on ICMP/ICMP6 code and type.
* Added support for matching on GRE's key and C,K,S present bits.
* Added support for IP-in-IP tunnel.
+ * Accelerate flows with count action creation and destroy.
+ * Accelerate flows counter query.
* **Updated Solarflare network PMD.**
infiniband/mlx5dv.h \
enum MLX5DV_FLOW_ACTION_COUNTERS_DEVX \
$(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
+ HAVE_IBV_DEVX_ASYNC \
+ infiniband/mlx5dv.h \
+ func mlx5dv_devx_obj_query_async \
+ $(AUTOCONF_OUTPUT)
$Q sh -- '$<' '$@' \
HAVE_ETHTOOL_LINK_MODE_25G \
/usr/include/linux/ethtool.h \
'mlx5dv_devx_obj_create' ],
[ 'HAVE_IBV_FLOW_DEVX_COUNTERS', 'infiniband/mlx5dv.h',
'MLX5DV_FLOW_ACTION_COUNTERS_DEVX' ],
+ [ 'HAVE_IBV_DEVX_ASYNC', 'infiniband/mlx5dv.h',
+ 'mlx5dv_devx_obj_query_async' ],
[ 'HAVE_MLX5DV_DR', 'infiniband/mlx5dv.h',
'MLX5DV_DR_DOMAIN_TYPE_NIC_RX' ],
[ 'HAVE_MLX5DV_DR_ESWITCH', 'infiniband/mlx5dv.h',
#include <rte_rwlock.h>
#include <rte_spinlock.h>
#include <rte_string_fns.h>
+#include <rte_alarm.h>
#include "mlx5.h"
#include "mlx5_utils.h"
struct mlx5_counter_stats_mem_mng *mng;
uint8_t i;
int j;
+ int retries = 1024;
+ rte_errno = 0;
+ while (--retries) {
+ rte_eal_alarm_cancel(mlx5_flow_query_alarm, sh);
+ if (rte_errno != EINPROGRESS)
+ break;
+ rte_pause();
+ }
for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i) {
struct mlx5_flow_counter_pool *pool;
uint32_t batch = !!(i % 2);
};
#define MLX5_COUNTERS_PER_POOL 512
+#define MLX5_MAX_PENDING_QUERIES 4
struct mlx5_flow_counter_pool;
struct mlx5_devx_obj *dcs; /**< Counter Devx object. */
struct mlx5_flow_counter_pool *pool; /**< The counter pool. */
};
- uint64_t hits; /**< Reset value of hits packets. */
+ union {
+ uint64_t hits; /**< Reset value of hits packets. */
+ int64_t query_gen; /**< Generation of the last release. */
+ };
uint64_t bytes; /**< Reset value of bytes. */
void *action; /**< Pointer to the dv action. */
};
struct mlx5_flow_counter_pool {
TAILQ_ENTRY(mlx5_flow_counter_pool) next;
struct mlx5_counters counters; /* Free counter list. */
- struct mlx5_devx_obj *min_dcs;
- /* The devx object of the minimum counter ID in the pool. */
- struct mlx5_counter_stats_raw *raw; /* The counter stats memory raw. */
- struct mlx5_flow_counter counters_raw[]; /* The counters memory. */
+ union {
+ struct mlx5_devx_obj *min_dcs;
+ rte_atomic64_t a64_dcs;
+ };
+ /* The devx object of the minimum counter ID. */
+ rte_atomic64_t query_gen;
+ uint32_t n_counters: 16; /* Number of devx allocated counters. */
+ rte_spinlock_t sl; /* The pool lock. */
+ struct mlx5_counter_stats_raw *raw;
+ struct mlx5_counter_stats_raw *raw_hw; /* The raw on HW working. */
+ struct mlx5_flow_counter counters_raw[]; /* The pool counters memory. */
};
struct mlx5_counter_stats_raw;
/* Container structure for counter pools. */
struct mlx5_pools_container {
- uint16_t n_valid; /* Number of valid pools. */
+ rte_atomic16_t n_valid; /* Number of valid pools. */
uint16_t n; /* Number of pools. */
struct mlx5_counter_pools pool_list; /* Counter pool list. */
struct mlx5_flow_counter_pool **pools; /* Counter pool array. */
/* Counter global management structure. */
struct mlx5_flow_counter_mng {
- struct mlx5_pools_container ccont[2];
+ uint8_t mhi[2]; /* master \ host container index. */
+ struct mlx5_pools_container ccont[2 * 2];
+ /* 2 containers for single and for batch for double-buffer. */
struct mlx5_counters flow_counters; /* Legacy flow counter list. */
+ uint8_t pending_queries;
+ uint8_t batch;
+ uint16_t pool_index;
+ uint8_t query_thread_on;
LIST_HEAD(mem_mngs, mlx5_counter_stats_mem_mng) mem_mngs;
+ LIST_HEAD(stat_raws, mlx5_counter_stats_raw) free_stat_raws;
};
/* Per port data of shared IB device. */
pthread_mutex_t intr_mutex; /* Interrupt config mutex. */
uint32_t intr_cnt; /* Interrupt handler reference counter. */
struct rte_intr_handle intr_handle; /* Interrupt handler for device. */
+ struct rte_intr_handle intr_handle_devx; /* DEVX interrupt handler. */
+ struct mlx5dv_devx_cmd_comp *devx_comp; /* DEVX async comp obj. */
struct mlx5_ibv_shared_port port[]; /* per device port data array. */
};
struct rte_pci_addr *pci_addr);
void mlx5_dev_link_status_handler(void *arg);
void mlx5_dev_interrupt_handler(void *arg);
+void mlx5_dev_interrupt_handler_devx(void *arg);
void mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev);
void mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev);
int mlx5_set_link_down(struct rte_eth_dev *dev);
struct rte_flow_item_eth *eth_mask);
int mlx5_flow_create_drop_queue(struct rte_eth_dev *dev);
void mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev);
+void mlx5_flow_async_pool_query_handle(struct mlx5_ibv_shared *sh,
+ uint64_t async_id, int status);
+void mlx5_set_query_alarm(struct mlx5_ibv_shared *sh);
+void mlx5_flow_query_alarm(void *arg);
/* mlx5_mp.c */
void mlx5_mp_req_start_rxtx(struct rte_eth_dev *dev);
int mlx5_devx_cmd_flow_counter_query(struct mlx5_devx_obj *dcs,
int clear, uint32_t n_counters,
uint64_t *pkts, uint64_t *bytes,
- uint32_t mkey, void *addr);
+ uint32_t mkey, void *addr,
+ struct mlx5dv_devx_cmd_comp *cmd_comp,
+ uint64_t async_id);
int mlx5_devx_cmd_query_hca_attr(struct ibv_context *ctx,
struct mlx5_hca_attr *attr);
struct mlx5_devx_obj *mlx5_devx_cmd_mkey_create(struct ibv_context *ctx,
struct mlx5_devx_mkey_attr *attr);
+int mlx5_devx_get_out_command_status(void *out);
#endif /* RTE_PMD_MLX5_H_ */
* The mkey key for batch query.
* @param addr
* The address in the mkey range for batch query.
+ * @param cmd_comp
+ * The completion object for asynchronous batch query.
+ * @param async_id
+ * The ID to be returned in the asynchronous batch query response.
*
* @return
* 0 on success, a negative value otherwise.
*/
int
-mlx5_devx_cmd_flow_counter_query(struct mlx5_devx_obj *dcs, int clear,
- uint32_t n_counters, uint64_t *pkts,
- uint64_t *bytes, uint32_t mkey, void *addr)
+mlx5_devx_cmd_flow_counter_query(struct mlx5_devx_obj *dcs,
+ int clear, uint32_t n_counters,
+ uint64_t *pkts, uint64_t *bytes,
+ uint32_t mkey, void *addr,
+ struct mlx5dv_devx_cmd_comp *cmd_comp,
+ uint64_t async_id)
{
int out_len = MLX5_ST_SZ_BYTES(query_flow_counter_out) +
MLX5_ST_SZ_BYTES(traffic_counter);
MLX5_SET64(query_flow_counter_in, in, address,
(uint64_t)(uintptr_t)addr);
}
- rc = mlx5_glue->devx_obj_query(dcs->obj, in, sizeof(in), out, out_len);
+ if (!cmd_comp)
+ rc = mlx5_glue->devx_obj_query(dcs->obj, in, sizeof(in), out,
+ out_len);
+ else
+ rc = mlx5_glue->devx_obj_query_async(dcs->obj, in, sizeof(in),
+ out_len, async_id,
+ cmd_comp);
if (rc) {
DRV_LOG(ERR, "Failed to query devx counters with rc %d\n ", rc);
rte_errno = rc;
return mkey;
}
+/**
+ * Get status of devx command response.
+ * Mainly used for asynchronous commands.
+ *
+ * @param[in] out
+ * The out response buffer.
+ *
+ * @return
+ * 0 on success, non-zero value otherwise.
+ */
+int
+mlx5_devx_get_out_command_status(void *out)
+{
+ int status;
+
+ if (!out)
+ return -EINVAL;
+ status = MLX5_GET(query_flow_counter_out, out, status);
+ if (status) {
+ int syndrome = MLX5_GET(query_flow_counter_out, out, syndrome);
+
+ DRV_LOG(ERR, "Bad devX status %x, syndrome = %x\n", status,
+ syndrome);
+ }
+ return status;
+}
+
/**
* Destroy any object allocated by a Devx API.
*
} while (true);
}
+/**
+ * Handle DEVX interrupts from the NIC.
+ * This function is probably called from the DPDK host thread.
+ *
+ * @param cb_arg
+ * Callback argument.
+ */
+void
+mlx5_dev_interrupt_handler_devx(void *cb_arg)
+{
+#ifndef HAVE_IBV_DEVX_ASYNC
+ (void)cb_arg;
+ return;
+#else
+ struct mlx5_ibv_shared *sh = cb_arg;
+ union {
+ struct mlx5dv_devx_async_cmd_hdr cmd_resp;
+ uint8_t buf[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
+ MLX5_ST_SZ_BYTES(traffic_counter) +
+ sizeof(struct mlx5dv_devx_async_cmd_hdr)];
+ } out;
+ uint8_t *buf = out.buf + sizeof(out.cmd_resp);
+
+ while (!mlx5_glue->devx_get_async_cmd_comp(sh->devx_comp,
+ &out.cmd_resp,
+ sizeof(out.buf)))
+ mlx5_flow_async_pool_query_handle
+ (sh, (uint64_t)out.cmd_resp.wr_id,
+ mlx5_devx_get_out_command_status(buf));
+#endif /* HAVE_IBV_DEVX_ASYNC */
+}
+
/**
* Uninstall shared asynchronous device events handler.
* This function is implemented to support event sharing
mlx5_dev_interrupt_handler, sh);
sh->intr_handle.fd = 0;
sh->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+ if (sh->intr_handle_devx.fd) {
+ rte_intr_callback_unregister(&sh->intr_handle_devx,
+ mlx5_dev_interrupt_handler_devx,
+ sh);
+ sh->intr_handle_devx.fd = 0;
+ sh->intr_handle_devx.type = RTE_INTR_HANDLE_UNKNOWN;
+ }
+ if (sh->devx_comp) {
+ mlx5_glue->devx_destroy_cmd_comp(sh->devx_comp);
+ sh->devx_comp = NULL;
+ }
exit:
pthread_mutex_unlock(&sh->intr_mutex);
}
if (ret) {
DRV_LOG(INFO, "failed to change file descriptor"
" async event queue");
- /* Indicate there will be no interrupts. */
- dev->data->dev_conf.intr_conf.lsc = 0;
- dev->data->dev_conf.intr_conf.rmv = 0;
- sh->port[priv->ibv_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
- goto exit;
+ goto error;
}
sh->intr_handle.fd = sh->ctx->async_fd;
sh->intr_handle.type = RTE_INTR_HANDLE_EXT;
rte_intr_callback_register(&sh->intr_handle,
mlx5_dev_interrupt_handler, sh);
+ if (priv->config.devx) {
+#ifndef HAVE_IBV_DEVX_ASYNC
+ goto error_unregister;
+#else
+ sh->devx_comp = mlx5_glue->devx_create_cmd_comp(sh->ctx);
+ if (sh->devx_comp) {
+ flags = fcntl(sh->devx_comp->fd, F_GETFL);
+ ret = fcntl(sh->devx_comp->fd, F_SETFL,
+ flags | O_NONBLOCK);
+ if (ret) {
+ DRV_LOG(INFO, "failed to change file descriptor"
+ " devx async event queue");
+ goto error_unregister;
+ }
+ sh->intr_handle_devx.fd = sh->devx_comp->fd;
+ sh->intr_handle_devx.type = RTE_INTR_HANDLE_EXT;
+ rte_intr_callback_register
+ (&sh->intr_handle_devx,
+ mlx5_dev_interrupt_handler_devx, sh);
+ } else {
+ DRV_LOG(INFO, "failed to create devx async command "
+ "completion");
+ goto error_unregister;
+ }
+#endif /* HAVE_IBV_DEVX_ASYNC */
+ }
sh->intr_cnt++;
+ goto exit;
+error_unregister:
+ rte_intr_callback_unregister(&sh->intr_handle,
+ mlx5_dev_interrupt_handler, sh);
+error:
+ /* Indicate there will be no interrupts. */
+ dev->data->dev_conf.intr_conf.lsc = 0;
+ dev->data->dev_conf.intr_conf.rmv = 0;
+ sh->intr_handle.fd = 0;
+ sh->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+ sh->port[priv->ibv_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
exit:
pthread_mutex_unlock(&sh->intr_mutex);
}
}
return 0;
}
+
+#define MLX5_POOL_QUERY_FREQ_US 1000000
+
+/**
+ * Set the periodic procedure for triggering asynchronous batch queries for all
+ * the counter pools.
+ *
+ * @param[in] sh
+ * Pointer to mlx5_ibv_shared object.
+ */
+void
+mlx5_set_query_alarm(struct mlx5_ibv_shared *sh)
+{
+ struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(sh, 0, 0);
+ uint32_t pools_n = rte_atomic16_read(&cont->n_valid);
+ uint32_t us;
+
+ cont = MLX5_CNT_CONTAINER(sh, 1, 0);
+ pools_n += rte_atomic16_read(&cont->n_valid);
+ us = MLX5_POOL_QUERY_FREQ_US / pools_n;
+ DRV_LOG(DEBUG, "Set alarm for %u pools each %u us\n", pools_n, us);
+ if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
+ sh->cmng.query_thread_on = 0;
+ DRV_LOG(ERR, "Cannot reinitialize query alarm\n");
+ } else {
+ sh->cmng.query_thread_on = 1;
+ }
+}
+
+/**
+ * The periodic procedure for triggering asynchronous batch queries for all the
+ * counter pools. This function is probably called by the host thread.
+ *
+ * @param[in] arg
+ * The parameter for the alarm process.
+ */
+void
+mlx5_flow_query_alarm(void *arg)
+{
+ struct mlx5_ibv_shared *sh = arg;
+ struct mlx5_devx_obj *dcs;
+ uint16_t offset;
+ int ret;
+ uint8_t batch = sh->cmng.batch;
+ uint16_t pool_index = sh->cmng.pool_index;
+ struct mlx5_pools_container *cont;
+ struct mlx5_pools_container *mcont;
+ struct mlx5_flow_counter_pool *pool;
+
+ if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
+ goto set_alarm;
+next_container:
+ cont = MLX5_CNT_CONTAINER(sh, batch, 1);
+ mcont = MLX5_CNT_CONTAINER(sh, batch, 0);
+ /* Check if resize was done and need to flip a container. */
+ if (cont != mcont) {
+ if (cont->pools) {
+ /* Clean the old container. */
+ rte_free(cont->pools);
+ memset(cont, 0, sizeof(*cont));
+ }
+ rte_cio_wmb();
+ /* Flip the host container. */
+ sh->cmng.mhi[batch] ^= (uint8_t)2;
+ cont = mcont;
+ }
+ if (!cont->pools) {
+ /* 2 empty containers case is unexpected. */
+ if (unlikely(batch != sh->cmng.batch))
+ goto set_alarm;
+ batch ^= 0x1;
+ pool_index = 0;
+ goto next_container;
+ }
+ pool = cont->pools[pool_index];
+ if (pool->raw_hw)
+ /* There is a pool query in progress. */
+ goto set_alarm;
+ pool->raw_hw =
+ LIST_FIRST(&sh->cmng.free_stat_raws);
+ if (!pool->raw_hw)
+ /* No free counter statistics raw memory. */
+ goto set_alarm;
+ dcs = (struct mlx5_devx_obj *)(uintptr_t)rte_atomic64_read
+ (&pool->a64_dcs);
+ offset = batch ? 0 : dcs->id % MLX5_COUNTERS_PER_POOL;
+ ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL -
+ offset, NULL, NULL,
+ pool->raw_hw->mem_mng->dm->id,
+ (void *)(uintptr_t)
+ (pool->raw_hw->data + offset),
+ sh->devx_comp,
+ (uint64_t)(uintptr_t)pool);
+ if (ret) {
+ DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
+ " %d\n", pool->min_dcs->id);
+ pool->raw_hw = NULL;
+ goto set_alarm;
+ }
+ pool->raw_hw->min_dcs_id = dcs->id;
+ LIST_REMOVE(pool->raw_hw, next);
+ sh->cmng.pending_queries++;
+ pool_index++;
+ if (pool_index >= rte_atomic16_read(&cont->n_valid)) {
+ batch ^= 0x1;
+ pool_index = 0;
+ }
+set_alarm:
+ sh->cmng.batch = batch;
+ sh->cmng.pool_index = pool_index;
+ mlx5_set_query_alarm(sh);
+}
+
+/**
+ * Handler for the HW respond about ready values from an asynchronous batch
+ * query. This function is probably called by the host thread.
+ *
+ * @param[in] sh
+ * The pointer to the shared IB device context.
+ * @param[in] async_id
+ * The Devx async ID.
+ * @param[in] status
+ * The status of the completion.
+ */
+void
+mlx5_flow_async_pool_query_handle(struct mlx5_ibv_shared *sh,
+ uint64_t async_id, int status)
+{
+ struct mlx5_flow_counter_pool *pool =
+ (struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
+ struct mlx5_counter_stats_raw *raw_to_free;
+
+ if (unlikely(status)) {
+ raw_to_free = pool->raw_hw;
+ } else {
+ raw_to_free = pool->raw;
+ rte_spinlock_lock(&pool->sl);
+ pool->raw = pool->raw_hw;
+ rte_spinlock_unlock(&pool->sl);
+ rte_atomic64_add(&pool->query_gen, 1);
+ /* Be sure the new raw counters data is updated in memory. */
+ rte_cio_wmb();
+ }
+ LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
+ pool->raw_hw = NULL;
+ sh->cmng.pending_queries--;
+}
#pragma GCC diagnostic error "-Wpedantic"
#endif
+#include <rte_atomic.h>
+#include <rte_alarm.h>
+
#include "mlx5.h"
#include "mlx5_prm.h"
mlx5_flow_query_t query;
};
+#define MLX5_CNT_CONTAINER(sh, batch, thread) (&(sh)->cmng.ccont \
+ [(((sh)->cmng.mhi[batch] >> (thread)) & 0x1) * 2 + (batch)])
+#define MLX5_CNT_CONTAINER_UNUSED(sh, batch, thread) (&(sh)->cmng.ccont \
+ [(~((sh)->cmng.mhi[batch] >> (thread)) & 0x1) * 2 + (batch)])
+
/* mlx5_flow.c */
uint64_t mlx5_flow_hashfields_adjust(struct mlx5_flow *dev_flow, int tunnel,
}
#define MLX5_CNT_CONTAINER_RESIZE 64
-#define MLX5_CNT_CONTAINER(priv, batch) (&(priv)->sh->cmng.ccont[batch])
/**
* Get a pool by a counter.
* Whether the pool is for counter that was allocated by batch command.
*
* @return
- * The container pointer on success, otherwise NULL and rte_errno is set.
+ * The new container pointer on success, otherwise NULL and rte_errno is set.
*/
static struct mlx5_pools_container *
flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch)
{
struct mlx5_priv *priv = dev->data->dev_private;
- struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv, batch);
+ struct mlx5_pools_container *cont =
+ MLX5_CNT_CONTAINER(priv->sh, batch, 0);
+ struct mlx5_pools_container *new_cont =
+ MLX5_CNT_CONTAINER_UNUSED(priv->sh, batch, 0);
struct mlx5_counter_stats_mem_mng *mem_mng;
uint32_t resize = cont->n + MLX5_CNT_CONTAINER_RESIZE;
uint32_t mem_size = sizeof(struct mlx5_flow_counter_pool *) * resize;
- struct mlx5_flow_counter_pool **new_pools = rte_calloc(__func__, 1,
- mem_size, 0);
- if (!new_pools) {
+ int i;
+
+ if (cont != MLX5_CNT_CONTAINER(priv->sh, batch, 1)) {
+ /* The last resize still hasn't detected by the host thread. */
+ rte_errno = EAGAIN;
+ return NULL;
+ }
+ new_cont->pools = rte_calloc(__func__, 1, mem_size, 0);
+ if (!new_cont->pools) {
rte_errno = ENOMEM;
return NULL;
}
+ if (cont->n)
+ memcpy(new_cont->pools, cont->pools, cont->n *
+ sizeof(struct mlx5_flow_counter_pool *));
mem_mng = flow_dv_create_counter_stat_mem_mng(dev,
- MLX5_CNT_CONTAINER_RESIZE);
+ MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES);
if (!mem_mng) {
- rte_free(new_pools);
+ rte_free(new_cont->pools);
return NULL;
}
- if (cont->n) {
- memcpy(new_pools, cont->pools,
- cont->n * sizeof(struct mlx5_flow_counter_pool *));
- rte_free(cont->pools);
- } else {
- TAILQ_INIT(&cont->pool_list);
- }
- cont->pools = new_pools;
- cont->n = resize;
- cont->init_mem_mng = mem_mng;
- return cont;
+ for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
+ LIST_INSERT_HEAD(&priv->sh->cmng.free_stat_raws,
+ mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE +
+ i, next);
+ new_cont->n = resize;
+ rte_atomic16_set(&new_cont->n_valid, rte_atomic16_read(&cont->n_valid));
+ TAILQ_INIT(&new_cont->pool_list);
+ TAILQ_CONCAT(&new_cont->pool_list, &cont->pool_list, next);
+ new_cont->init_mem_mng = mem_mng;
+ rte_cio_wmb();
+ /* Flip the master container. */
+ priv->sh->cmng.mhi[batch] ^= (uint8_t)1;
+ return new_cont;
}
/**
{
struct mlx5_flow_counter_pool *pool =
flow_dv_counter_pool_get(cnt);
- uint16_t offset = pool->min_dcs->id % MLX5_COUNTERS_PER_POOL;
- int ret = mlx5_devx_cmd_flow_counter_query
- (pool->min_dcs, 0, MLX5_COUNTERS_PER_POOL - offset, NULL,
- NULL, pool->raw->mem_mng->dm->id,
- (void *)(uintptr_t)(pool->raw->data +
- offset));
-
- if (ret) {
- DRV_LOG(ERR, "Failed to trigger synchronous"
- " query for dcs ID %d\n",
- pool->min_dcs->id);
- return ret;
+ int offset = cnt - &pool->counters_raw[0];
+
+ rte_spinlock_lock(&pool->sl);
+ /*
+ * The single counters allocation may allocate smaller ID than the
+ * current allocated in parallel to the host reading.
+ * In this case the new counter values must be reported as 0.
+ */
+ if (unlikely(!cnt->batch && cnt->dcs->id < pool->raw->min_dcs_id)) {
+ *pkts = 0;
+ *bytes = 0;
+ } else {
+ *pkts = rte_be_to_cpu_64(pool->raw->data[offset].hits);
+ *bytes = rte_be_to_cpu_64(pool->raw->data[offset].bytes);
}
- offset = cnt - &pool->counters_raw[0];
- *pkts = rte_be_to_cpu_64(pool->raw->data[offset].hits);
- *bytes = rte_be_to_cpu_64(pool->raw->data[offset].bytes);
+ rte_spinlock_unlock(&pool->sl);
return 0;
}
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_flow_counter_pool *pool;
- struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv, batch);
+ struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch,
+ 0);
+ int16_t n_valid = rte_atomic16_read(&cont->n_valid);
uint32_t size;
- if (cont->n == cont->n_valid) {
+ if (cont->n == n_valid) {
cont = flow_dv_container_resize(dev, batch);
if (!cont)
return NULL;
return NULL;
}
pool->min_dcs = dcs;
- pool->raw = cont->init_mem_mng->raws + cont->n_valid %
- MLX5_CNT_CONTAINER_RESIZE;
+ pool->raw = cont->init_mem_mng->raws + n_valid %
+ MLX5_CNT_CONTAINER_RESIZE;
+ pool->raw_hw = NULL;
+ rte_spinlock_init(&pool->sl);
+ /*
+ * The generation of the new allocated counters in this pool is 0, 2 in
+ * the pool generation makes all the counters valid for allocation.
+ */
+ rte_atomic64_set(&pool->query_gen, 0x2);
TAILQ_INIT(&pool->counters);
TAILQ_INSERT_TAIL(&cont->pool_list, pool, next);
- cont->pools[cont->n_valid] = pool;
- cont->n_valid++;
+ cont->pools[n_valid] = pool;
+ /* Pool initialization must be updated before host thread access. */
+ rte_cio_wmb();
+ rte_atomic16_add(&cont->n_valid, 1);
return pool;
}
dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0);
if (!dcs)
return NULL;
- pool = flow_dv_find_pool_by_id(MLX5_CNT_CONTAINER(priv, batch),
- dcs->id);
+ pool = flow_dv_find_pool_by_id
+ (MLX5_CNT_CONTAINER(priv->sh, batch, 0), dcs->id);
if (!pool) {
pool = flow_dv_pool_create(dev, dcs, batch);
if (!pool) {
return NULL;
}
} else if (dcs->id < pool->min_dcs->id) {
- pool->min_dcs->id = dcs->id;
+ rte_atomic64_set(&pool->a64_dcs,
+ (int64_t)(uintptr_t)dcs);
}
cnt = &pool->counters_raw[dcs->id % MLX5_COUNTERS_PER_POOL];
TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
* shared counters from the single container.
*/
uint32_t batch = (group && !shared) ? 1 : 0;
- struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv, batch);
+ struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch,
+ 0);
+#ifndef HAVE_IBV_DEVX_ASYNC
+ rte_errno = ENOTSUP;
+ return NULL;
+#endif
if (!priv->config.devx) {
rte_errno = ENOTSUP;
return NULL;
}
}
/* Pools which has a free counters are in the start. */
- pool = TAILQ_FIRST(&cont->pool_list);
- if (pool)
+ TAILQ_FOREACH(pool, &cont->pool_list, next) {
+ /*
+ * The free counter reset values must be updated between the
+ * counter release to the counter allocation, so, at least one
+ * query must be done in this time. ensure it by saving the
+ * query generation in the release time.
+ * The free list is sorted according to the generation - so if
+ * the first one is not updated, all the others are not
+ * updated too.
+ */
cnt_free = TAILQ_FIRST(&pool->counters);
+ if (cnt_free && cnt_free->query_gen + 1 <
+ rte_atomic64_read(&pool->query_gen))
+ break;
+ cnt_free = NULL;
+ }
if (!cnt_free) {
pool = flow_dv_counter_pool_prepare(dev, &cnt_free, batch);
if (!pool)
cnt_free->shared = shared;
cnt_free->ref_cnt = 1;
cnt_free->id = id;
+ if (!priv->sh->cmng.query_thread_on)
+ /* Start the asynchronous batch query by the host thread. */
+ mlx5_set_query_alarm(priv->sh);
TAILQ_REMOVE(&pool->counters, cnt_free, next);
if (TAILQ_EMPTY(&pool->counters)) {
/* Move the pool to the end of the container pool list. */
struct mlx5_flow_counter_pool *pool =
flow_dv_counter_pool_get(counter);
- /* Put the counter in the end - the earliest one. */
+ /* Put the counter in the end - the last updated one. */
TAILQ_INSERT_TAIL(&pool->counters, counter, next);
+ counter->query_gen = rte_atomic64_read(&pool->query_gen);
}
}
#endif
}
+static struct mlx5dv_devx_cmd_comp *
+mlx5_glue_devx_create_cmd_comp(struct ibv_context *ctx)
+{
+#ifdef HAVE_IBV_DEVX_ASYNC
+ return mlx5dv_devx_create_cmd_comp(ctx);
+#else
+ (void)ctx;
+ errno = -ENOTSUP;
+ return NULL;
+#endif
+}
+
+static void
+mlx5_glue_devx_destroy_cmd_comp(struct mlx5dv_devx_cmd_comp *cmd_comp)
+{
+#ifdef HAVE_IBV_DEVX_ASYNC
+ mlx5dv_devx_destroy_cmd_comp(cmd_comp);
+#else
+ (void)cmd_comp;
+ errno = -ENOTSUP;
+#endif
+}
+
+static int
+mlx5_glue_devx_obj_query_async(struct mlx5dv_devx_obj *obj, const void *in,
+ size_t inlen, size_t outlen, uint64_t wr_id,
+ struct mlx5dv_devx_cmd_comp *cmd_comp)
+{
+#ifdef HAVE_IBV_DEVX_ASYNC
+ return mlx5dv_devx_obj_query_async(obj, in, inlen, outlen, wr_id,
+ cmd_comp);
+#else
+ (void)obj;
+ (void)in;
+ (void)inlen;
+ (void)outlen;
+ (void)wr_id;
+ (void)cmd_comp;
+ return -ENOTSUP;
+#endif
+}
+
+static int
+mlx5_glue_devx_get_async_cmd_comp(struct mlx5dv_devx_cmd_comp *cmd_comp,
+ struct mlx5dv_devx_async_cmd_hdr *cmd_resp,
+ size_t cmd_resp_len)
+{
+#ifdef HAVE_IBV_DEVX_ASYNC
+ return mlx5dv_devx_get_async_cmd_comp(cmd_comp, cmd_resp,
+ cmd_resp_len);
+#else
+ (void)cmd_comp;
+ (void)cmd_resp;
+ (void)cmd_resp_len;
+ return -ENOTSUP;
+#endif
+}
+
static struct mlx5dv_devx_umem *
mlx5_glue_devx_umem_reg(struct ibv_context *context, void *addr, size_t size,
uint32_t access)
.devx_obj_query = mlx5_glue_devx_obj_query,
.devx_obj_modify = mlx5_glue_devx_obj_modify,
.devx_general_cmd = mlx5_glue_devx_general_cmd,
+ .devx_create_cmd_comp = mlx5_glue_devx_create_cmd_comp,
+ .devx_destroy_cmd_comp = mlx5_glue_devx_destroy_cmd_comp,
+ .devx_obj_query_async = mlx5_glue_devx_obj_query_async,
+ .devx_get_async_cmd_comp = mlx5_glue_devx_get_async_cmd_comp,
.devx_umem_reg = mlx5_glue_devx_umem_reg,
.devx_umem_dereg = mlx5_glue_devx_umem_dereg,
};
struct mlx5dv_devx_umem;
#endif
+#ifndef HAVE_IBV_DEVX_ASYNC
+struct mlx5dv_devx_cmd_comp;
+struct mlx5dv_devx_async_cmd_hdr;
+#endif
+
#ifndef HAVE_MLX5DV_DR
enum mlx5dv_dr_domain_type { unused, };
struct mlx5dv_dr_domain;
int (*devx_general_cmd)(struct ibv_context *context,
const void *in, size_t inlen,
void *out, size_t outlen);
+ struct mlx5dv_devx_cmd_comp *(*devx_create_cmd_comp)
+ (struct ibv_context *context);
+ void (*devx_destroy_cmd_comp)(struct mlx5dv_devx_cmd_comp *cmd_comp);
+ int (*devx_obj_query_async)(struct mlx5dv_devx_obj *obj,
+ const void *in, size_t inlen,
+ size_t outlen, uint64_t wr_id,
+ struct mlx5dv_devx_cmd_comp *cmd_comp);
+ int (*devx_get_async_cmd_comp)(struct mlx5dv_devx_cmd_comp *cmd_comp,
+ struct mlx5dv_devx_async_cmd_hdr *resp,
+ size_t cmd_resp_len);
struct mlx5dv_devx_umem *(*devx_umem_reg)(struct ibv_context *context,
void *addr, size_t size,
uint32_t access);