net/mlx5: use the new infrastructure for tc flow
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_tcf.c
index 4b51a85..b890aa2 100644 (file)
@@ -231,6 +231,23 @@ struct tc_pedit_sel {
 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
 #endif
 
+#ifndef TTL_LEN
+#define TTL_LEN 1
+#endif
+
+/**
+ * Structure for holding netlink context.
+ * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
+ * Using this (8KB) buffer size ensures that netlink messages will never be
+ * truncated.
+ */
+struct mlx5_flow_tcf_context {
+       struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
+       uint32_t seq; /* Message sequence number. */
+       uint32_t buf_size; /* Message buffer size. */
+       uint8_t *buf; /* Message buffer. */
+};
+
 /** Empty masks for known item types. */
 static const union {
        struct rte_flow_item_port_id port_id;
@@ -319,12 +336,15 @@ struct flow_tcf_ptoi {
 #define MLX5_TCF_PEDIT_ACTIONS \
        (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
         MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
-        MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)
+        MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
+        MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL | \
+        MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)
 
 #define MLX5_TCF_CONFIG_ACTIONS \
        (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
         MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
-        MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | MLX5_TCF_PEDIT_ACTIONS)
+        MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
+        (MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
 
 #define MAX_PEDIT_KEYS 128
 #define SZ_PEDIT_KEY_VAL 4
@@ -344,6 +364,82 @@ struct pedit_parser {
 };
 
 
+/**
+ * Set pedit key of MAC address
+ *
+ * @param[in] actions
+ *   pointer to action specification
+ * @param[in,out] p_parser
+ *   pointer to pedit_parser
+ */
+static void
+flow_tcf_pedit_key_set_mac(const struct rte_flow_action *actions,
+                          struct pedit_parser *p_parser)
+{
+       int idx = p_parser->sel.nkeys;
+       uint32_t off = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ?
+                                       offsetof(struct ether_hdr, s_addr) :
+                                       offsetof(struct ether_hdr, d_addr);
+       const struct rte_flow_action_set_mac *conf =
+               (const struct rte_flow_action_set_mac *)actions->conf;
+
+       p_parser->keys[idx].off = off;
+       p_parser->keys[idx].mask = ~UINT32_MAX;
+       p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
+       p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+       memcpy(&p_parser->keys[idx].val,
+               conf->mac_addr, SZ_PEDIT_KEY_VAL);
+       idx++;
+       p_parser->keys[idx].off = off + SZ_PEDIT_KEY_VAL;
+       p_parser->keys[idx].mask = 0xFFFF0000;
+       p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
+       p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+       memcpy(&p_parser->keys[idx].val,
+               conf->mac_addr + SZ_PEDIT_KEY_VAL,
+               ETHER_ADDR_LEN - SZ_PEDIT_KEY_VAL);
+       p_parser->sel.nkeys = (++idx);
+}
+
+/**
+ * Set pedit key of decrease/set ttl
+ *
+ * @param[in] actions
+ *   pointer to action specification
+ * @param[in,out] p_parser
+ *   pointer to pedit_parser
+ * @param[in] item_flags
+ *   flags of all items presented
+ */
+static void
+flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action *actions,
+                               struct pedit_parser *p_parser,
+                               uint64_t item_flags)
+{
+       int idx = p_parser->sel.nkeys;
+
+       p_parser->keys[idx].mask = 0xFFFFFF00;
+       if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
+               p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
+               p_parser->keys[idx].off =
+                       offsetof(struct ipv4_hdr, time_to_live);
+       }
+       if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
+               p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
+               p_parser->keys[idx].off =
+                       offsetof(struct ipv6_hdr, hop_limits);
+       }
+       if (actions->type == RTE_FLOW_ACTION_TYPE_DEC_TTL) {
+               p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
+               p_parser->keys[idx].val = 0x000000FF;
+       } else {
+               p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+               p_parser->keys[idx].val =
+                       (__u32)((const struct rte_flow_action_set_ttl *)
+                        actions->conf)->ttl_value;
+       }
+       p_parser->sel.nkeys = (++idx);
+}
+
 /**
  * Set pedit key of transport (TCP/UDP) port value
  *
@@ -479,6 +575,15 @@ flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
                        flow_tcf_pedit_key_set_tp_port(*actions,
                                                        &p_parser, item_flags);
                        break;
+               case RTE_FLOW_ACTION_TYPE_SET_TTL:
+               case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+                       flow_tcf_pedit_key_set_dec_ttl(*actions,
+                                                       &p_parser, item_flags);
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
+                       flow_tcf_pedit_key_set_mac(*actions, &p_parser);
+                       break;
                default:
                        goto pedit_mnl_msg_done;
                }
@@ -559,6 +664,22 @@ flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
                        keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
                        flags |= MLX5_FLOW_ACTION_SET_TP_DST;
                        break;
+               case RTE_FLOW_ACTION_TYPE_SET_TTL:
+                       keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
+                       flags |= MLX5_FLOW_ACTION_SET_TTL;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+                       keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
+                       flags |= MLX5_FLOW_ACTION_DEC_TTL;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+                       keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
+                       flags |= MLX5_FLOW_ACTION_SET_MAC_SRC;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
+                       keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
+                       flags |= MLX5_FLOW_ACTION_SET_MAC_DST;
+                       break;
                default:
                        goto get_pedit_action_size_done;
                }
@@ -1096,6 +1217,18 @@ flow_tcf_validate(struct rte_eth_dev *dev,
                case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
                        current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
                        break;
+               case RTE_FLOW_ACTION_TYPE_SET_TTL:
+                       current_action_flag = MLX5_FLOW_ACTION_SET_TTL;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+                       current_action_flag = MLX5_FLOW_ACTION_DEC_TTL;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+                       current_action_flag = MLX5_FLOW_ACTION_SET_MAC_SRC;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
+                       current_action_flag = MLX5_FLOW_ACTION_SET_MAC_DST;
+                       break;
                default:
                        return rte_flow_error_set(error, ENOTSUP,
                                                  RTE_FLOW_ERROR_TYPE_ACTION,
@@ -1198,6 +1331,25 @@ flow_tcf_validate(struct rte_eth_dev *dev,
                return rte_flow_error_set(error, EINVAL,
                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
                                          "no fate action is found");
+       if (action_flags &
+          (MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)) {
+               if (!(item_flags &
+                    (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
+                     MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
+                       return rte_flow_error_set(error, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "no IP found in pattern");
+       }
+       if (action_flags &
+           (MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)) {
+               if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L2))
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "no ethernet found in"
+                                                 " pattern");
+       }
        return 0;
 }
 
@@ -1357,6 +1509,10 @@ action_of_vlan:
                case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
                case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
                case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+               case RTE_FLOW_ACTION_TYPE_SET_TTL:
+               case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
                        size += flow_tcf_get_pedit_actions_size(&actions,
                                                                &flags);
                        break;
@@ -1933,6 +2089,10 @@ override_na_vlan_priority:
                case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
                case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
                case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+               case RTE_FLOW_ACTION_TYPE_SET_TTL:
+               case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
                        na_act_index =
                                mnl_attr_nest_start(nlh, na_act_index_cur++);
                        flow_tcf_create_pedit_mnl_msg(nlh,
@@ -1956,8 +2116,8 @@ override_na_vlan_priority:
 /**
  * Send Netlink message with acknowledgment.
  *
- * @param nl
- *   Libmnl socket to use.
+ * @param ctx
+ *   Flow context to use.
  * @param nlh
  *   Message to send. This function always raises the NLM_F_ACK flag before
  *   sending.
@@ -1966,12 +2126,13 @@ override_na_vlan_priority:
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
+flow_tcf_nl_ack(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr *nlh)
 {
        alignas(struct nlmsghdr)
        uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
                    nlh->nlmsg_len - sizeof(*nlh)];
-       uint32_t seq = random();
+       uint32_t seq = ctx->seq++;
+       struct mnl_socket *nl = ctx->nl;
        int ret;
 
        nlh->nlmsg_flags |= NLM_F_ACK;
@@ -2006,7 +2167,7 @@ flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
               struct rte_flow_error *error)
 {
        struct priv *priv = dev->data->dev_private;
-       struct mnl_socket *nl = priv->mnl_socket;
+       struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
        struct mlx5_flow *dev_flow;
        struct nlmsghdr *nlh;
 
@@ -2016,7 +2177,7 @@ flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
        nlh = dev_flow->tcf.nlh;
        nlh->nlmsg_type = RTM_NEWTFILTER;
        nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
-       if (!flow_tcf_nl_ack(nl, nlh))
+       if (!flow_tcf_nl_ack(ctx, nlh))
                return 0;
        return rte_flow_error_set(error, rte_errno,
                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
@@ -2035,7 +2196,7 @@ static void
 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
 {
        struct priv *priv = dev->data->dev_private;
-       struct mnl_socket *nl = priv->mnl_socket;
+       struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
        struct mlx5_flow *dev_flow;
        struct nlmsghdr *nlh;
 
@@ -2049,7 +2210,7 @@ flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
        nlh = dev_flow->tcf.nlh;
        nlh->nlmsg_type = RTM_DELTFILTER;
        nlh->nlmsg_flags = NLM_F_REQUEST;
-       flow_tcf_nl_ack(nl, nlh);
+       flow_tcf_nl_ack(ctx, nlh);
 }
 
 /**
@@ -2087,10 +2248,47 @@ const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
 };
 
 /**
- * Initialize ingress qdisc of a given network interface.
+ * Create and configure a libmnl socket for Netlink flow rules.
+ *
+ * @return
+ *   A valid libmnl socket object pointer on success, NULL otherwise and
+ *   rte_errno is set.
+ */
+static struct mnl_socket *
+flow_tcf_mnl_socket_create(void)
+{
+       struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
+
+       if (nl) {
+               mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
+                                     sizeof(int));
+               if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
+                       return nl;
+       }
+       rte_errno = errno;
+       if (nl)
+               mnl_socket_close(nl);
+       return NULL;
+}
+
+/**
+ * Destroy a libmnl socket.
  *
  * @param nl
  *   Libmnl socket of the @p NETLINK_ROUTE kind.
+ */
+static void
+flow_tcf_mnl_socket_destroy(struct mnl_socket *nl)
+{
+       if (nl)
+               mnl_socket_close(nl);
+}
+
+/**
+ * Initialize ingress qdisc of a given network interface.
+ *
+ * @param ctx
+ *   Pointer to tc-flower context to use.
  * @param ifindex
  *   Index of network interface to initialize.
  * @param[out] error
@@ -2100,8 +2298,8 @@ const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
-                  struct rte_flow_error *error)
+mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
+                  unsigned int ifindex, struct rte_flow_error *error)
 {
        struct nlmsghdr *nlh;
        struct tcmsg *tcm;
@@ -2118,7 +2316,7 @@ mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
        tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
        tcm->tcm_parent = TC_H_INGRESS;
        /* Ignore errors when qdisc is already absent. */
-       if (flow_tcf_nl_ack(nl, nlh) &&
+       if (flow_tcf_nl_ack(ctx, nlh) &&
            rte_errno != EINVAL && rte_errno != ENOENT)
                return rte_flow_error_set(error, rte_errno,
                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
@@ -2134,7 +2332,7 @@ mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
        tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
        tcm->tcm_parent = TC_H_INGRESS;
        mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
-       if (flow_tcf_nl_ack(nl, nlh))
+       if (flow_tcf_nl_ack(ctx, nlh))
                return rte_flow_error_set(error, rte_errno,
                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
                                          "netlink: failed to create ingress"
@@ -2143,37 +2341,47 @@ mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
 }
 
 /**
- * Create and configure a libmnl socket for Netlink flow rules.
+ * Create libmnl context for Netlink flow rules.
  *
  * @return
  *   A valid libmnl socket object pointer on success, NULL otherwise and
  *   rte_errno is set.
  */
-struct mnl_socket *
-mlx5_flow_tcf_socket_create(void)
+struct mlx5_flow_tcf_context *
+mlx5_flow_tcf_context_create(void)
 {
-       struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
-
-       if (nl) {
-               mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
-                                     sizeof(int));
-               if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
-                       return nl;
-       }
-       rte_errno = errno;
-       if (nl)
-               mnl_socket_close(nl);
+       struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
+                                                       sizeof(*ctx),
+                                                       sizeof(uint32_t));
+       if (!ctx)
+               goto error;
+       ctx->nl = flow_tcf_mnl_socket_create();
+       if (!ctx->nl)
+               goto error;
+       ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
+       ctx->buf = rte_zmalloc(__func__,
+                              ctx->buf_size, sizeof(uint32_t));
+       if (!ctx->buf)
+               goto error;
+       ctx->seq = random();
+       return ctx;
+error:
+       mlx5_flow_tcf_context_destroy(ctx);
        return NULL;
 }
 
 /**
- * Destroy a libmnl socket.
+ * Destroy a libmnl context.
  *
- * @param nl
+ * @param ctx
  *   Libmnl socket of the @p NETLINK_ROUTE kind.
  */
 void
-mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
+mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
 {
-       mnl_socket_close(nl);
+       if (!ctx)
+               return;
+       flow_tcf_mnl_socket_destroy(ctx->nl);
+       rte_free(ctx->buf);
+       rte_free(ctx);
 }