net/mlx5: support match on IPv4 fragment packets
authorDekel Peled <dekelp@nvidia.com>
Thu, 15 Oct 2020 14:05:54 +0000 (17:05 +0300)
committerFerruh Yigit <ferruh.yigit@intel.com>
Tue, 3 Nov 2020 21:29:25 +0000 (22:29 +0100)
This patch adds to MLX5 PMD the support of matching on IPv4
fragmented and non-fragmented packets, using the IPv4 header
fragment_offset field.

Signed-off-by: Dekel Peled <dekelp@nvidia.com>
Acked-by: Ori Kam <orika@nvidia.com>
doc/guides/rel_notes/release_20_11.rst
drivers/net/mlx5/mlx5_flow.c
drivers/net/mlx5/mlx5_flow.h
drivers/net/mlx5/mlx5_flow_dv.c
drivers/net/mlx5/mlx5_flow_verbs.c

index 1524f61..72bd8d0 100644 (file)
@@ -169,6 +169,12 @@ New Features
   Added the FEC PMD which provides functions for query FEC capabilities and
   current FEC mode from device. Also, PMD for configuring FEC mode is also provided.
 
+* **Updated Mellanox mlx5 driver.**
+
+  Updated Mellanox mlx5 driver with new features and improvements, including:
+
+  * Added support for matching on fragmented/non-fragmented IPv4 packets.
+
 * **Updated Solarflare network PMD.**
 
   Updated the Solarflare ``sfc_efx`` driver with changes including:
index 3d38e11..1116ebb 100644 (file)
@@ -800,6 +800,8 @@ mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
  *   Bit-masks covering supported fields by the NIC to compare with user mask.
  * @param[in] size
  *   Bit-masks size in bytes.
+ * @param[in] range_accepted
+ *   True if range of values is accepted for specific fields, false otherwise.
  * @param[out] error
  *   Pointer to error structure.
  *
@@ -811,6 +813,7 @@ mlx5_flow_item_acceptable(const struct rte_flow_item *item,
                          const uint8_t *mask,
                          const uint8_t *nic_mask,
                          unsigned int size,
+                         bool range_accepted,
                          struct rte_flow_error *error)
 {
        unsigned int i;
@@ -828,7 +831,7 @@ mlx5_flow_item_acceptable(const struct rte_flow_item *item,
                                          RTE_FLOW_ERROR_TYPE_ITEM, item,
                                          "mask/last without a spec is not"
                                          " supported");
-       if (item->spec && item->last) {
+       if (item->spec && item->last && !range_accepted) {
                uint8_t spec[size];
                uint8_t last[size];
                unsigned int i;
@@ -1603,7 +1606,8 @@ mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
        ret = mlx5_flow_item_acceptable
                (item, (const uint8_t *)mask,
                 (const uint8_t *)&rte_flow_item_icmp6_mask,
-                sizeof(struct rte_flow_item_icmp6), error);
+                sizeof(struct rte_flow_item_icmp6),
+                MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
        if (ret < 0)
                return ret;
        return 0;
@@ -1661,7 +1665,8 @@ mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
        ret = mlx5_flow_item_acceptable
                (item, (const uint8_t *)mask,
                 (const uint8_t *)&nic_mask,
-                sizeof(struct rte_flow_item_icmp), error);
+                sizeof(struct rte_flow_item_icmp),
+                MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
        if (ret < 0)
                return ret;
        return 0;
@@ -1716,7 +1721,7 @@ mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
        ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
                                        (const uint8_t *)&nic_mask,
                                        sizeof(struct rte_flow_item_eth),
-                                       error);
+                                       MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
        return ret;
 }
 
@@ -1770,7 +1775,7 @@ mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
        ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
                                        (const uint8_t *)&nic_mask,
                                        sizeof(struct rte_flow_item_vlan),
-                                       error);
+                                       MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
        if (ret)
                return ret;
        if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
@@ -1822,6 +1827,8 @@ mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
  * @param[in] acc_mask
  *   Acceptable mask, if NULL default internal default mask
  *   will be used to check whether item fields are supported.
+ * @param[in] range_accepted
+ *   True if range of values is accepted for specific fields, false otherwise.
  * @param[out] error
  *   Pointer to error structure.
  *
@@ -1834,6 +1841,7 @@ mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
                             uint64_t last_item,
                             uint16_t ether_type,
                             const struct rte_flow_item_ipv4 *acc_mask,
+                            bool range_accepted,
                             struct rte_flow_error *error)
 {
        const struct rte_flow_item_ipv4 *mask = item->mask;
@@ -1904,7 +1912,7 @@ mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
                                        acc_mask ? (const uint8_t *)acc_mask
                                                 : (const uint8_t *)&nic_mask,
                                        sizeof(struct rte_flow_item_ipv4),
-                                       error);
+                                       range_accepted, error);
        if (ret < 0)
                return ret;
        return 0;
@@ -2003,7 +2011,7 @@ mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
                                        acc_mask ? (const uint8_t *)acc_mask
                                                 : (const uint8_t *)&nic_mask,
                                        sizeof(struct rte_flow_item_ipv6),
-                                       error);
+                                       MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
        if (ret < 0)
                return ret;
        return 0;
@@ -2058,7 +2066,8 @@ mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
        ret = mlx5_flow_item_acceptable
                (item, (const uint8_t *)mask,
                 (const uint8_t *)&rte_flow_item_udp_mask,
-                sizeof(struct rte_flow_item_udp), error);
+                sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
+                error);
        if (ret < 0)
                return ret;
        return 0;
@@ -2113,7 +2122,8 @@ mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
        ret = mlx5_flow_item_acceptable
                (item, (const uint8_t *)mask,
                 (const uint8_t *)flow_mask,
-                sizeof(struct rte_flow_item_tcp), error);
+                sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
+                error);
        if (ret < 0)
                return ret;
        return 0;
@@ -2167,7 +2177,7 @@ mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
                (item, (const uint8_t *)mask,
                 (const uint8_t *)&rte_flow_item_vxlan_mask,
                 sizeof(struct rte_flow_item_vxlan),
-                error);
+                MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
        if (ret < 0)
                return ret;
        if (spec) {
@@ -2238,7 +2248,7 @@ mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
                (item, (const uint8_t *)mask,
                 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
                 sizeof(struct rte_flow_item_vxlan_gpe),
-                error);
+                MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
        if (ret < 0)
                return ret;
        if (spec) {
@@ -2312,7 +2322,7 @@ mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
        ret = mlx5_flow_item_acceptable
                (item, (const uint8_t *)mask,
                 (const uint8_t *)&gre_key_default_mask,
-                sizeof(rte_be32_t), error);
+                sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
        return ret;
 }
 
@@ -2364,7 +2374,8 @@ mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
        ret = mlx5_flow_item_acceptable
                (item, (const uint8_t *)mask,
                 (const uint8_t *)&nic_mask,
-                sizeof(struct rte_flow_item_gre), error);
+                sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED,
+                error);
        if (ret < 0)
                return ret;
 #ifndef HAVE_MLX5DV_DR
@@ -2439,7 +2450,8 @@ mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
        ret = mlx5_flow_item_acceptable
                                  (item, (const uint8_t *)mask,
                                   (const uint8_t *)&nic_mask,
-                                  sizeof(struct rte_flow_item_geneve), error);
+                                  sizeof(struct rte_flow_item_geneve),
+                                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
        if (ret)
                return ret;
        if (spec) {
@@ -2522,7 +2534,8 @@ mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
        ret = mlx5_flow_item_acceptable
                (item, (const uint8_t *)mask,
                 (const uint8_t *)&rte_flow_item_mpls_mask,
-                sizeof(struct rte_flow_item_mpls), error);
+                sizeof(struct rte_flow_item_mpls),
+                MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
        if (ret < 0)
                return ret;
        return 0;
@@ -2577,7 +2590,8 @@ mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
        ret = mlx5_flow_item_acceptable
                (item, (const uint8_t *)mask,
                 (const uint8_t *)&rte_flow_item_nvgre_mask,
-                sizeof(struct rte_flow_item_nvgre), error);
+                sizeof(struct rte_flow_item_nvgre),
+                MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
        if (ret < 0)
                return ret;
        return 0;
@@ -2671,7 +2685,7 @@ mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item,
                                         acc_mask ? (const uint8_t *)acc_mask
                                                  : (const uint8_t *)&nic_mask,
                                         sizeof(struct rte_flow_item_ecpri),
-                                        error);
+                                        MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
 }
 
 /* Allocate unique ID for the split Q/RSS subflows. */
index 0dc76c4..b6efbff 100644 (file)
@@ -331,6 +331,14 @@ enum mlx5_feature_name {
 #define MLX5_ENCAPSULATION_DECISION_SIZE (sizeof(struct rte_flow_item_eth) + \
                                          sizeof(struct rte_flow_item_ipv4))
 
+/* IPv4 fragment_offset field contains relevant data in bits 2 to 15. */
+#define MLX5_IPV4_FRAG_OFFSET_MASK \
+               (RTE_IPV4_HDR_OFFSET_MASK | RTE_IPV4_HDR_MF_FLAG)
+
+/* Specific item's fields can accept a range of values (using spec and last). */
+#define MLX5_ITEM_RANGE_NOT_ACCEPTED   false
+#define MLX5_ITEM_RANGE_ACCEPTED       true
+
 /* Software header modify action numbers of a flow. */
 #define MLX5_ACT_NUM_MDF_IPV4          1
 #define MLX5_ACT_NUM_MDF_IPV6          4
@@ -1046,6 +1054,7 @@ int mlx5_flow_item_acceptable(const struct rte_flow_item *item,
                              const uint8_t *mask,
                              const uint8_t *nic_mask,
                              unsigned int size,
+                             bool range_accepted,
                              struct rte_flow_error *error);
 int mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
                                uint64_t item_flags,
@@ -1063,6 +1072,7 @@ int mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
                                 uint64_t last_item,
                                 uint16_t ether_type,
                                 const struct rte_flow_item_ipv4 *acc_mask,
+                                bool range_accepted,
                                 struct rte_flow_error *error);
 int mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
                                 uint64_t item_flags,
index 3fa51c3..ff97f78 100644 (file)
@@ -1426,7 +1426,7 @@ flow_dv_validate_item_mark(struct rte_eth_dev *dev,
        ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
                                        (const uint8_t *)&nic_mask,
                                        sizeof(struct rte_flow_item_mark),
-                                       error);
+                                       MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
        if (ret < 0)
                return ret;
        return 0;
@@ -1502,7 +1502,7 @@ flow_dv_validate_item_meta(struct rte_eth_dev *dev __rte_unused,
        ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
                                        (const uint8_t *)&nic_mask,
                                        sizeof(struct rte_flow_item_meta),
-                                       error);
+                                       MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
        return ret;
 }
 
@@ -1555,7 +1555,7 @@ flow_dv_validate_item_tag(struct rte_eth_dev *dev,
        ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
                                        (const uint8_t *)&nic_mask,
                                        sizeof(struct rte_flow_item_tag),
-                                       error);
+                                       MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
        if (ret < 0)
                return ret;
        if (mask->index != 0xff)
@@ -1626,7 +1626,7 @@ flow_dv_validate_item_port_id(struct rte_eth_dev *dev,
                                (item, (const uint8_t *)mask,
                                 (const uint8_t *)&rte_flow_item_port_id_mask,
                                 sizeof(struct rte_flow_item_port_id),
-                                error);
+                                MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
        if (ret)
                return ret;
        if (!spec)
@@ -1699,7 +1699,7 @@ flow_dv_validate_item_vlan(const struct rte_flow_item *item,
        ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
                                        (const uint8_t *)&nic_mask,
                                        sizeof(struct rte_flow_item_vlan),
-                                       error);
+                                       MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
        if (ret)
                return ret;
        if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
@@ -1786,11 +1786,126 @@ flow_dv_validate_item_gtp(struct rte_eth_dev *dev,
                                          RTE_FLOW_ERROR_TYPE_ITEM, item,
                                          "Match is supported for GTP"
                                          " flags only");
-       return mlx5_flow_item_acceptable
-               (item, (const uint8_t *)mask,
-                (const uint8_t *)&nic_mask,
-                sizeof(struct rte_flow_item_gtp),
-                error);
+       return mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
+                                        (const uint8_t *)&nic_mask,
+                                        sizeof(struct rte_flow_item_gtp),
+                                        MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
+}
+
+/**
+ * Validate IPV4 item.
+ * Use existing validation function mlx5_flow_validate_item_ipv4(), and
+ * add specific validation of fragment_offset field,
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit-fields that holds the items detected until now.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_item_ipv4(const struct rte_flow_item *item,
+                          uint64_t item_flags,
+                          uint64_t last_item,
+                          uint16_t ether_type,
+                          struct rte_flow_error *error)
+{
+       int ret;
+       const struct rte_flow_item_ipv4 *spec = item->spec;
+       const struct rte_flow_item_ipv4 *last = item->last;
+       const struct rte_flow_item_ipv4 *mask = item->mask;
+       rte_be16_t fragment_offset_spec = 0;
+       rte_be16_t fragment_offset_last = 0;
+       const struct rte_flow_item_ipv4 nic_ipv4_mask = {
+               .hdr = {
+                       .src_addr = RTE_BE32(0xffffffff),
+                       .dst_addr = RTE_BE32(0xffffffff),
+                       .type_of_service = 0xff,
+                       .fragment_offset = RTE_BE16(0xffff),
+                       .next_proto_id = 0xff,
+                       .time_to_live = 0xff,
+               },
+       };
+
+       ret = mlx5_flow_validate_item_ipv4(item, item_flags, last_item,
+                                          ether_type, &nic_ipv4_mask,
+                                          MLX5_ITEM_RANGE_ACCEPTED, error);
+       if (ret < 0)
+               return ret;
+       if (spec && mask)
+               fragment_offset_spec = spec->hdr.fragment_offset &
+                                      mask->hdr.fragment_offset;
+       if (!fragment_offset_spec)
+               return 0;
+       /*
+        * spec and mask are valid, enforce using full mask to make sure the
+        * complete value is used correctly.
+        */
+       if ((mask->hdr.fragment_offset & RTE_BE16(MLX5_IPV4_FRAG_OFFSET_MASK))
+                       != RTE_BE16(MLX5_IPV4_FRAG_OFFSET_MASK))
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+                                         item, "must use full mask for"
+                                         " fragment_offset");
+       /*
+        * Match on fragment_offset 0x2000 means MF is 1 and frag-offset is 0,
+        * indicating this is 1st fragment of fragmented packet.
+        * This is not yet supported in MLX5, return appropriate error message.
+        */
+       if (fragment_offset_spec == RTE_BE16(RTE_IPV4_HDR_MF_FLAG))
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "match on first fragment not "
+                                         "supported");
+       if (fragment_offset_spec && !last)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "specified value not supported");
+       /* spec and last are valid, validate the specified range. */
+       fragment_offset_last = last->hdr.fragment_offset &
+                              mask->hdr.fragment_offset;
+       /*
+        * Match on fragment_offset spec 0x2001 and last 0x3fff
+        * means MF is 1 and frag-offset is > 0.
+        * This packet is fragment 2nd and onward, excluding last.
+        * This is not yet supported in MLX5, return appropriate
+        * error message.
+        */
+       if (fragment_offset_spec == RTE_BE16(RTE_IPV4_HDR_MF_FLAG + 1) &&
+           fragment_offset_last == RTE_BE16(MLX5_IPV4_FRAG_OFFSET_MASK))
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ITEM_LAST,
+                                         last, "match on following "
+                                         "fragments not supported");
+       /*
+        * Match on fragment_offset spec 0x0001 and last 0x1fff
+        * means MF is 0 and frag-offset is > 0.
+        * This packet is last fragment of fragmented packet.
+        * This is not yet supported in MLX5, return appropriate
+        * error message.
+        */
+       if (fragment_offset_spec == RTE_BE16(1) &&
+           fragment_offset_last == RTE_BE16(RTE_IPV4_HDR_OFFSET_MASK))
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ITEM_LAST,
+                                         last, "match on last "
+                                         "fragment not supported");
+       /*
+        * Match on fragment_offset spec 0x0001 and last 0x3fff
+        * means MF and/or frag-offset is not 0.
+        * This is a fragmented packet.
+        * Other range values are invalid and rejected.
+        */
+       if (!(fragment_offset_spec == RTE_BE16(1) &&
+             fragment_offset_last == RTE_BE16(MLX5_IPV4_FRAG_OFFSET_MASK)))
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ITEM_LAST, last,
+                                         "specified range not supported");
+       return 0;
 }
 
 /**
@@ -5290,15 +5405,6 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
                        .dst_port = RTE_BE16(UINT16_MAX),
                }
        };
-       const struct rte_flow_item_ipv4 nic_ipv4_mask = {
-               .hdr = {
-                       .src_addr = RTE_BE32(0xffffffff),
-                       .dst_addr = RTE_BE32(0xffffffff),
-                       .type_of_service = 0xff,
-                       .next_proto_id = 0xff,
-                       .time_to_live = 0xff,
-               },
-       };
        const struct rte_flow_item_ipv6 nic_ipv6_mask = {
                .hdr = {
                        .src_addr =
@@ -5398,11 +5504,9 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
                case RTE_FLOW_ITEM_TYPE_IPV4:
                        mlx5_flow_tunnel_ip_check(items, next_protocol,
                                                  &item_flags, &tunnel);
-                       ret = mlx5_flow_validate_item_ipv4(items, item_flags,
-                                                          last_item,
-                                                          ether_type,
-                                                          &nic_ipv4_mask,
-                                                          error);
+                       ret = flow_dv_validate_item_ipv4(items, item_flags,
+                                                        last_item, ether_type,
+                                                        error);
                        if (ret < 0)
                                return ret;
                        last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
@@ -6511,6 +6615,10 @@ flow_dv_translate_item_ipv4(void *matcher, void *key,
                 ipv4_m->hdr.time_to_live);
        MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ttl_hoplimit,
                 ipv4_v->hdr.time_to_live & ipv4_m->hdr.time_to_live);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_m, frag,
+                !!(ipv4_m->hdr.fragment_offset));
+       MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
+                !!(ipv4_v->hdr.fragment_offset & ipv4_m->hdr.fragment_offset));
 }
 
 /**
index 62c18b8..276bcb5 100644 (file)
@@ -1312,10 +1312,11 @@ flow_verbs_validate(struct rte_eth_dev *dev,
                        }
                        break;
                case RTE_FLOW_ITEM_TYPE_IPV4:
-                       ret = mlx5_flow_validate_item_ipv4(items, item_flags,
-                                                          last_item,
-                                                          ether_type, NULL,
-                                                          error);
+                       ret = mlx5_flow_validate_item_ipv4
+                                               (items, item_flags,
+                                                last_item, ether_type, NULL,
+                                                MLX5_ITEM_RANGE_NOT_ACCEPTED,
+                                                error);
                        if (ret < 0)
                                return ret;
                        last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :