#define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
#endif
+#ifndef TTL_LEN
+#define TTL_LEN 1
+#endif
+
+/**
+ * Structure for holding netlink context.
+ * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
+ * Using this (8KB) buffer size ensures that netlink messages will never be
+ * truncated.
+ */
+struct mlx5_flow_tcf_context {
+ struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
+ uint32_t seq; /* Message sequence number. */
+ uint32_t buf_size; /* Message buffer size. */
+ uint8_t *buf; /* Message buffer. */
+};
+
/** Empty masks for known item types. */
static const union {
struct rte_flow_item_port_id port_id;
#define MLX5_TCF_PEDIT_ACTIONS \
(MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
- MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)
+ MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
+ MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL | \
+ MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)
#define MLX5_TCF_CONFIG_ACTIONS \
(MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
- MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | MLX5_TCF_PEDIT_ACTIONS)
+ MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
+ (MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
#define MAX_PEDIT_KEYS 128
#define SZ_PEDIT_KEY_VAL 4
};
+/**
+ * Set pedit key of MAC address
+ *
+ * @param[in] actions
+ * pointer to action specification
+ * @param[in,out] p_parser
+ * pointer to pedit_parser
+ */
+static void
+flow_tcf_pedit_key_set_mac(const struct rte_flow_action *actions,
+ struct pedit_parser *p_parser)
+{
+ int idx = p_parser->sel.nkeys;
+ uint32_t off = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ?
+ offsetof(struct ether_hdr, s_addr) :
+ offsetof(struct ether_hdr, d_addr);
+ const struct rte_flow_action_set_mac *conf =
+ (const struct rte_flow_action_set_mac *)actions->conf;
+
+ p_parser->keys[idx].off = off;
+ p_parser->keys[idx].mask = ~UINT32_MAX;
+ p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
+ p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+ memcpy(&p_parser->keys[idx].val,
+ conf->mac_addr, SZ_PEDIT_KEY_VAL);
+ idx++;
+ p_parser->keys[idx].off = off + SZ_PEDIT_KEY_VAL;
+ p_parser->keys[idx].mask = 0xFFFF0000;
+ p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
+ p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+ memcpy(&p_parser->keys[idx].val,
+ conf->mac_addr + SZ_PEDIT_KEY_VAL,
+ ETHER_ADDR_LEN - SZ_PEDIT_KEY_VAL);
+ p_parser->sel.nkeys = (++idx);
+}
+
+/**
+ * Set pedit key of decrease/set ttl
+ *
+ * @param[in] actions
+ * pointer to action specification
+ * @param[in,out] p_parser
+ * pointer to pedit_parser
+ * @param[in] item_flags
+ * flags of all items presented
+ */
+static void
+flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action *actions,
+ struct pedit_parser *p_parser,
+ uint64_t item_flags)
+{
+ int idx = p_parser->sel.nkeys;
+
+ p_parser->keys[idx].mask = 0xFFFFFF00;
+ if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
+ p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
+ p_parser->keys[idx].off =
+ offsetof(struct ipv4_hdr, time_to_live);
+ }
+ if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
+ p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
+ p_parser->keys[idx].off =
+ offsetof(struct ipv6_hdr, hop_limits);
+ }
+ if (actions->type == RTE_FLOW_ACTION_TYPE_DEC_TTL) {
+ p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
+ p_parser->keys[idx].val = 0x000000FF;
+ } else {
+ p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+ p_parser->keys[idx].val =
+ (__u32)((const struct rte_flow_action_set_ttl *)
+ actions->conf)->ttl_value;
+ }
+ p_parser->sel.nkeys = (++idx);
+}
+
/**
* Set pedit key of transport (TCP/UDP) port value
*
flow_tcf_pedit_key_set_tp_port(*actions,
&p_parser, item_flags);
break;
+ case RTE_FLOW_ACTION_TYPE_SET_TTL:
+ case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+ flow_tcf_pedit_key_set_dec_ttl(*actions,
+ &p_parser, item_flags);
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+ case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
+ flow_tcf_pedit_key_set_mac(*actions, &p_parser);
+ break;
default:
goto pedit_mnl_msg_done;
}
keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
flags |= MLX5_FLOW_ACTION_SET_TP_DST;
break;
+ case RTE_FLOW_ACTION_TYPE_SET_TTL:
+ keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
+ flags |= MLX5_FLOW_ACTION_SET_TTL;
+ break;
+ case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+ keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
+ flags |= MLX5_FLOW_ACTION_DEC_TTL;
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+ keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
+ flags |= MLX5_FLOW_ACTION_SET_MAC_SRC;
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
+ keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
+ flags |= MLX5_FLOW_ACTION_SET_MAC_DST;
+ break;
default:
goto get_pedit_action_size_done;
}
case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
break;
+ case RTE_FLOW_ACTION_TYPE_SET_TTL:
+ current_action_flag = MLX5_FLOW_ACTION_SET_TTL;
+ break;
+ case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+ current_action_flag = MLX5_FLOW_ACTION_DEC_TTL;
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+ current_action_flag = MLX5_FLOW_ACTION_SET_MAC_SRC;
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
+ current_action_flag = MLX5_FLOW_ACTION_SET_MAC_DST;
+ break;
default:
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ACTION,
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ACTION, actions,
"no fate action is found");
+ if (action_flags &
+ (MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)) {
+ if (!(item_flags &
+ (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
+ MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions,
+ "no IP found in pattern");
+ }
+ if (action_flags &
+ (MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)) {
+ if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L2))
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions,
+ "no ethernet found in"
+ " pattern");
+ }
return 0;
}
case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+ case RTE_FLOW_ACTION_TYPE_SET_TTL:
+ case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+ case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+ case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
size += flow_tcf_get_pedit_actions_size(&actions,
&flags);
break;
case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+ case RTE_FLOW_ACTION_TYPE_SET_TTL:
+ case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+ case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+ case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
na_act_index =
mnl_attr_nest_start(nlh, na_act_index_cur++);
flow_tcf_create_pedit_mnl_msg(nlh,
/**
* Send Netlink message with acknowledgment.
*
- * @param nl
- * Libmnl socket to use.
+ * @param ctx
+ * Flow context to use.
* @param nlh
* Message to send. This function always raises the NLM_F_ACK flag before
* sending.
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
-flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
+flow_tcf_nl_ack(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr *nlh)
{
alignas(struct nlmsghdr)
uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
nlh->nlmsg_len - sizeof(*nlh)];
- uint32_t seq = random();
+ uint32_t seq = ctx->seq++;
+ struct mnl_socket *nl = ctx->nl;
int ret;
nlh->nlmsg_flags |= NLM_F_ACK;
struct rte_flow_error *error)
{
struct priv *priv = dev->data->dev_private;
- struct mnl_socket *nl = priv->mnl_socket;
+ struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
struct mlx5_flow *dev_flow;
struct nlmsghdr *nlh;
nlh = dev_flow->tcf.nlh;
nlh->nlmsg_type = RTM_NEWTFILTER;
nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
- if (!flow_tcf_nl_ack(nl, nlh))
+ if (!flow_tcf_nl_ack(ctx, nlh))
return 0;
return rte_flow_error_set(error, rte_errno,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
{
struct priv *priv = dev->data->dev_private;
- struct mnl_socket *nl = priv->mnl_socket;
+ struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
struct mlx5_flow *dev_flow;
struct nlmsghdr *nlh;
nlh = dev_flow->tcf.nlh;
nlh->nlmsg_type = RTM_DELTFILTER;
nlh->nlmsg_flags = NLM_F_REQUEST;
- flow_tcf_nl_ack(nl, nlh);
+ flow_tcf_nl_ack(ctx, nlh);
}
/**
};
/**
- * Initialize ingress qdisc of a given network interface.
+ * Create and configure a libmnl socket for Netlink flow rules.
+ *
+ * @return
+ * A valid libmnl socket object pointer on success, NULL otherwise and
+ * rte_errno is set.
+ */
+static struct mnl_socket *
+flow_tcf_mnl_socket_create(void)
+{
+ struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
+
+ if (nl) {
+ mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
+ sizeof(int));
+ if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
+ return nl;
+ }
+ rte_errno = errno;
+ if (nl)
+ mnl_socket_close(nl);
+ return NULL;
+}
+
+/**
+ * Destroy a libmnl socket.
*
* @param nl
* Libmnl socket of the @p NETLINK_ROUTE kind.
+ */
+static void
+flow_tcf_mnl_socket_destroy(struct mnl_socket *nl)
+{
+ if (nl)
+ mnl_socket_close(nl);
+}
+
+/**
+ * Initialize ingress qdisc of a given network interface.
+ *
+ * @param ctx
+ * Pointer to tc-flower context to use.
* @param ifindex
* Index of network interface to initialize.
* @param[out] error
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
-mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
- struct rte_flow_error *error)
+mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
+ unsigned int ifindex, struct rte_flow_error *error)
{
struct nlmsghdr *nlh;
struct tcmsg *tcm;
tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
tcm->tcm_parent = TC_H_INGRESS;
/* Ignore errors when qdisc is already absent. */
- if (flow_tcf_nl_ack(nl, nlh) &&
+ if (flow_tcf_nl_ack(ctx, nlh) &&
rte_errno != EINVAL && rte_errno != ENOENT)
return rte_flow_error_set(error, rte_errno,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
tcm->tcm_parent = TC_H_INGRESS;
mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
- if (flow_tcf_nl_ack(nl, nlh))
+ if (flow_tcf_nl_ack(ctx, nlh))
return rte_flow_error_set(error, rte_errno,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
"netlink: failed to create ingress"
}
/**
- * Create and configure a libmnl socket for Netlink flow rules.
+ * Create libmnl context for Netlink flow rules.
*
* @return
* A valid libmnl socket object pointer on success, NULL otherwise and
* rte_errno is set.
*/
-struct mnl_socket *
-mlx5_flow_tcf_socket_create(void)
+struct mlx5_flow_tcf_context *
+mlx5_flow_tcf_context_create(void)
{
- struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
-
- if (nl) {
- mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
- sizeof(int));
- if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
- return nl;
- }
- rte_errno = errno;
- if (nl)
- mnl_socket_close(nl);
+ struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
+ sizeof(*ctx),
+ sizeof(uint32_t));
+ if (!ctx)
+ goto error;
+ ctx->nl = flow_tcf_mnl_socket_create();
+ if (!ctx->nl)
+ goto error;
+ ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
+ ctx->buf = rte_zmalloc(__func__,
+ ctx->buf_size, sizeof(uint32_t));
+ if (!ctx->buf)
+ goto error;
+ ctx->seq = random();
+ return ctx;
+error:
+ mlx5_flow_tcf_context_destroy(ctx);
return NULL;
}
/**
- * Destroy a libmnl socket.
+ * Destroy a libmnl context.
*
- * @param nl
+ * @param ctx
* Libmnl socket of the @p NETLINK_ROUTE kind.
*/
void
-mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
+mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
{
- mnl_socket_close(nl);
+ if (!ctx)
+ return;
+ flow_tcf_mnl_socket_destroy(ctx->nl);
+ rte_free(ctx->buf);
+ rte_free(ctx);
}