From 8f848f32fc24f6db2dc4366b8f4111cdc3e67356 Mon Sep 17 00:00:00 2001 From: Viacheslav Ovsiienko Date: Thu, 16 Jul 2020 08:23:05 +0000 Subject: [PATCH] net/mlx5: introduce send scheduling devargs This patch introduces the new devargs: tx_pp - enables accurate packet send scheduling on mbuf timestamps in the PMD. On the device start if "rte_dynflag_timestamp" dynamic flag is registered and this devarg non-zero value is specified, the driver initializes all necessary internal infrastructure to provide packet scheduling. The parameter value specifies scheduling granularity in nanoseconds. tx_skew - the parameter adjusts the send packet scheduling on timestamps and represents the average delay between beginning of the transmitting descriptor processing by the hardware and appearance of actual packet data on the wire. The value should be provided in nanoseconds and is valid only if tx_pp parameter is specified. The default value is zero. Signed-off-by: Viacheslav Ovsiienko Acked-by: Matan Azrad --- doc/guides/nics/mlx5.rst | 37 +++++++++++++++++++ drivers/net/mlx5/linux/mlx5_os.c | 63 ++++++++++++++++++++++++++++++++ drivers/net/mlx5/mlx5.c | 39 ++++++++++++++++++-- drivers/net/mlx5/mlx5.h | 2 + 4 files changed, 138 insertions(+), 3 deletions(-) diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index 4b6d8fb4d5..9a57768370 100644 --- a/doc/guides/nics/mlx5.rst +++ b/doc/guides/nics/mlx5.rst @@ -241,6 +241,24 @@ Limitations reduce the requested Tx size or adjust data inline settings with ``txq_inline_max`` and ``txq_inline_mpw`` devargs keys. +- To provide the packet send scheduling on mbuf timestamps the ``tx_pp`` + parameter should be specified, RTE_MBUF_DYNFIELD_TIMESTAMP_NAME and + RTE_MBUF_DYNFLAG_TIMESTAMP_NAME should be registered by application. + When PMD sees the RTE_MBUF_DYNFLAG_TIMESTAMP_NAME set on the packet + being sent it tries to synchronize the time of packet appearing on + the wire with the specified packet timestamp. It the specified one + is in the past it should be ignored, if one is in the distant future + it should be capped with some reasonable value (in range of seconds). + These specific cases ("too late" and "distant future") can be optionally + reported via device xstats to assist applications to detect the + time-related problems. + + There is no any packet reordering according timestamps is supposed, + neither within packet burst, nor between packets, it is an entirely + application responsibility to generate packets and its timestamps + in desired order. The timestamps can be put only in the first packet + in the burst providing the entire burst scheduling. + - E-Switch decapsulation Flow: - can be applied to PF port only. @@ -700,6 +718,25 @@ Driver options variable "MLX5_SHUT_UP_BF" value is used. If there is no "MLX5_SHUT_UP_BF", the default ``tx_db_nc`` value is zero for ARM64 hosts and one for others. +- ``tx_pp`` parameter [int] + + If a nonzero value is specified the driver creates all necessary internal + objects to provide accurate packet send scheduling on mbuf timestamps. + The positive value specifies the scheduling granularity in nanoseconds, + the packet send will be accurate up to specified digits. The allowed range is + from 500 to 1 million of nanoseconds. The negative value specifies the module + of granularity and engages the special test mode the check the schedule rate. + By default (if the ``tx_pp`` is not specified) send scheduling on timestamps + feature is disabled. + +- ``tx_skew`` parameter [int] + + The parameter adjusts the send packet scheduling on timestamps and represents + the average delay between beginning of the transmitting descriptor processing + by the hardware and appearance of actual packet data on the wire. The value + should be provided in nanoseconds and is valid only if ``tx_pp`` parameter is + specified. The default value is zero. + - ``tx_vec_en`` parameter [int] A nonzero value enables Tx vector on ConnectX-5, ConnectX-6, ConnectX-6 Dx diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 2dc57b20ef..14af468d6f 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -877,6 +877,69 @@ err_secondary: priv->mtr_color_reg); } } +#endif + } + if (config.tx_pp) { + DRV_LOG(DEBUG, "Timestamp counter frequency %u kHz", + config.hca_attr.dev_freq_khz); + DRV_LOG(DEBUG, "Packet pacing is %ssupported", + config.hca_attr.qos.packet_pacing ? "" : "not "); + DRV_LOG(DEBUG, "Cross channel ops are %ssupported", + config.hca_attr.cross_channel ? "" : "not "); + DRV_LOG(DEBUG, "WQE index ignore is %ssupported", + config.hca_attr.wqe_index_ignore ? "" : "not "); + DRV_LOG(DEBUG, "Non-wire SQ feature is %ssupported", + config.hca_attr.non_wire_sq ? "" : "not "); + DRV_LOG(DEBUG, "Static WQE SQ feature is %ssupported (%d)", + config.hca_attr.log_max_static_sq_wq ? "" : "not ", + config.hca_attr.log_max_static_sq_wq); + DRV_LOG(DEBUG, "WQE rate PP mode is %ssupported", + config.hca_attr.qos.wqe_rate_pp ? "" : "not "); + if (!config.devx) { + DRV_LOG(ERR, "DevX is required for packet pacing"); + err = ENODEV; + goto error; + } + if (!config.hca_attr.qos.packet_pacing) { + DRV_LOG(ERR, "Packet pacing is not supported"); + err = ENODEV; + goto error; + } + if (!config.hca_attr.cross_channel) { + DRV_LOG(ERR, "Cross channel operations are" + " required for packet pacing"); + err = ENODEV; + goto error; + } + if (!config.hca_attr.wqe_index_ignore) { + DRV_LOG(ERR, "WQE index ignore feature is" + " required for packet pacing"); + err = ENODEV; + goto error; + } + if (!config.hca_attr.non_wire_sq) { + DRV_LOG(ERR, "Non-wire SQ feature is" + " required for packet pacing"); + err = ENODEV; + goto error; + } + if (!config.hca_attr.log_max_static_sq_wq) { + DRV_LOG(ERR, "Static WQE SQ feature is" + " required for packet pacing"); + err = ENODEV; + goto error; + } + if (!config.hca_attr.qos.wqe_rate_pp) { + DRV_LOG(ERR, "WQE rate mode is required" + " for packet pacing"); + err = ENODEV; + goto error; + } +#ifndef HAVE_MLX5DV_DEVX_UAR_OFFSET + DRV_LOG(ERR, "DevX does not provide UAR offset," + " can't create queues for packet pacing"); + err = ENODEV; + goto error; #endif } if (config.mprq.enabled && mprq) { diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 0c654ed8b7..72e0870ebf 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -119,6 +119,19 @@ */ #define MLX5_TXQ_MAX_INLINE_LEN "txq_max_inline_len" +/* + * Device parameter to enable Tx scheduling on timestamps + * and specify the packet pacing granularity in nanoseconds. + */ +#define MLX5_TX_PP "tx_pp" + +/* + * Device parameter to specify skew in nanoseconds on Tx datapath, + * it represents the time between SQ start WQE processing and + * appearing actual packet data on the wire. + */ +#define MLX5_TX_SKEW "tx_skew" + /* * Device parameter to enable hardware Tx vector. * Deprecated, ignored (no vectorized Tx routines anymore). @@ -1271,18 +1284,26 @@ static int mlx5_args_check(const char *key, const char *val, void *opaque) { struct mlx5_dev_config *config = opaque; - unsigned long tmp; + unsigned long mod; + signed long tmp; /* No-op, port representors are processed in mlx5_dev_spawn(). */ if (!strcmp(MLX5_REPRESENTOR, key)) return 0; errno = 0; - tmp = strtoul(val, NULL, 0); + tmp = strtol(val, NULL, 0); if (errno) { rte_errno = errno; DRV_LOG(WARNING, "%s: \"%s\" is not a valid integer", key, val); return -rte_errno; } + if (tmp < 0 && strcmp(MLX5_TX_PP, key) && strcmp(MLX5_TX_SKEW, key)) { + /* Negative values are acceptable for some keys only. */ + rte_errno = EINVAL; + DRV_LOG(WARNING, "%s: invalid negative value \"%s\"", key, val); + return -rte_errno; + } + mod = tmp >= 0 ? tmp : -tmp; if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) { config->cqe_comp = !!tmp; } else if (strcmp(MLX5_RXQ_CQE_PAD_EN, key) == 0) { @@ -1333,6 +1354,15 @@ mlx5_args_check(const char *key, const char *val, void *opaque) config->txq_inline_mpw = tmp; } else if (strcmp(MLX5_TX_VEC_EN, key) == 0) { DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key); + } else if (strcmp(MLX5_TX_PP, key) == 0) { + if (!mod) { + DRV_LOG(ERR, "Zero Tx packet pacing parameter"); + rte_errno = EINVAL; + return -rte_errno; + } + config->tx_pp = tmp; + } else if (strcmp(MLX5_TX_SKEW, key) == 0) { + config->tx_skew = tmp; } else if (strcmp(MLX5_RX_VEC_EN, key) == 0) { config->rx_vec_en = !!tmp; } else if (strcmp(MLX5_L3_VXLAN_EN, key) == 0) { @@ -1415,6 +1445,8 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs) MLX5_TXQ_MPW_HDR_DSEG_EN, MLX5_TXQ_MAX_INLINE_LEN, MLX5_TX_DB_NC, + MLX5_TX_PP, + MLX5_TX_SKEW, MLX5_TX_VEC_EN, MLX5_RX_VEC_EN, MLX5_L3_VXLAN_EN, @@ -1693,7 +1725,8 @@ rte_pmd_mlx5_get_dyn_flag_names(char *names[], unsigned int n) { static const char *const dynf_names[] = { RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, - RTE_MBUF_DYNFLAG_METADATA_NAME + RTE_MBUF_DYNFLAG_METADATA_NAME, + RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME }; unsigned int i; diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 46e66eb1c6..84cd3e1252 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -240,6 +240,8 @@ struct mlx5_dev_config { int txq_inline_min; /* Minimal amount of data bytes to inline. */ int txq_inline_max; /* Max packet size for inlining with SEND. */ int txq_inline_mpw; /* Max packet size for inlining with eMPW. */ + int tx_pp; /* Timestamp scheduling granularity in nanoseconds. */ + int tx_skew; /* Tx scheduling skew between WQE and data on wire. */ struct mlx5_hca_attr hca_attr; /* HCA attributes. */ struct mlx5_lro_config lro; /* LRO configuration. */ }; -- 2.20.1