X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;ds=sidebyside;f=drivers%2Fnet%2Fmlx5%2Fmlx5_flow_tcf.c;h=fb284c3558c2da14c6ede02a264ff4e0d42190e0;hb=eccbe2ffdc755a9e759c6ba480f6c15ccb1163c7;hp=67a2e4b3a7881a46b0308d9f8f6572ad978828c5;hpb=f420f03d6772f9951a627efd978abce9d903928d;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c index 67a2e4b3a7..fb284c3558 100644 --- a/drivers/net/mlx5/mlx5_flow_tcf.c +++ b/drivers/net/mlx5/mlx5_flow_tcf.c @@ -160,6 +160,9 @@ struct tc_tunnel_key { #ifndef TCA_CLS_FLAGS_SKIP_SW #define TCA_CLS_FLAGS_SKIP_SW (1 << 1) #endif +#ifndef TCA_CLS_FLAGS_IN_HW +#define TCA_CLS_FLAGS_IN_HW (1 << 2) +#endif #ifndef HAVE_TCA_CHAIN #define TCA_CHAIN 11 #endif @@ -530,7 +533,15 @@ struct flow_tcf_ptoi { /* Due to a limitation on driver/FW. */ #define MLX5_TCF_GROUP_ID_MAX 3 -#define MLX5_TCF_GROUP_PRIORITY_MAX 14 + +/* + * Due to a limitation on driver/FW, priority ranges from 1 to 16 in kernel. + * Priority in rte_flow attribute starts from 0 and is added by 1 in + * translation. This is subject to be changed to determine the max priority + * based on trial-and-error like Verbs driver once the restriction is lifted or + * the range is extended. + */ +#define MLX5_TCF_GROUP_PRIORITY_MAX 15 #define MLX5_TCF_FATE_ACTIONS \ (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \ @@ -1091,19 +1102,13 @@ flow_tcf_validate_attributes(const struct rte_flow_attr *attr, "group ID larger than " RTE_STR(MLX5_TCF_GROUP_ID_MAX) " isn't supported"); - else if (attr->group > 0 && - attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX) + else if (attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX) return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, attr, - "lowest priority level is " + "priority more than " RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX) - " when group is configured"); - else if (attr->priority > 0xfffe) - return rte_flow_error_set(error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, - attr, - "lowest priority level is 0xfffe"); + " is not supported"); if (!attr->ingress) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, @@ -1882,9 +1887,23 @@ flow_tcf_validate(struct rte_eth_dev *dev, case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN: current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN; break; - case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: + case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: { + rte_be16_t ethertype; + current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN; + if (!actions->conf) + break; + conf.of_push_vlan = actions->conf; + ethertype = conf.of_push_vlan->ethertype; + if (ethertype != RTE_BE16(ETH_P_8021Q) && + ethertype != RTE_BE16(ETH_P_8021AD)) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, actions, + "vlan push TPID must be " + "802.1Q or 802.1AD"); break; + } case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN)) return rte_flow_error_set @@ -2368,24 +2387,21 @@ flow_tcf_validate(struct rte_eth_dev *dev, } /** - * Calculate maximum size of memory for flow items of Linux TC flower and - * extract specified items. + * Calculate maximum size of memory for flow items of Linux TC flower. * + * @param[in] attr + * Pointer to the flow attributes. * @param[in] items * Pointer to the list of items. - * @param[out] item_flags - * Pointer to the detected items. * * @return * Maximum size of memory for items. */ static int -flow_tcf_get_items_and_size(const struct rte_flow_attr *attr, - const struct rte_flow_item items[], - uint64_t *item_flags) +flow_tcf_get_items_size(const struct rte_flow_attr *attr, + const struct rte_flow_item items[]) { int size = 0; - uint64_t flags = 0; size += SZ_NLATTR_STRZ_OF("flower") + SZ_NLATTR_NEST + /* TCA_OPTIONS. */ @@ -2402,7 +2418,6 @@ flow_tcf_get_items_and_size(const struct rte_flow_attr *attr, size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */ SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4; /* dst/src MAC addr and mask. */ - flags |= MLX5_FLOW_LAYER_OUTER_L2; break; case RTE_FLOW_ITEM_TYPE_VLAN: size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */ @@ -2410,37 +2425,31 @@ flow_tcf_get_items_and_size(const struct rte_flow_attr *attr, /* VLAN Ether type. */ SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */ SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */ - flags |= MLX5_FLOW_LAYER_OUTER_VLAN; break; case RTE_FLOW_ITEM_TYPE_IPV4: size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */ SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */ SZ_NLATTR_TYPE_OF(uint32_t) * 4; /* dst/src IP addr and mask. */ - flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4; break; case RTE_FLOW_ITEM_TYPE_IPV6: size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */ SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */ SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) * 4; /* dst/src IP addr and mask. */ - flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6; break; case RTE_FLOW_ITEM_TYPE_UDP: size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */ SZ_NLATTR_TYPE_OF(uint16_t) * 4; /* dst/src port and mask. */ - flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP; break; case RTE_FLOW_ITEM_TYPE_TCP: size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */ SZ_NLATTR_TYPE_OF(uint16_t) * 4; /* dst/src port and mask. */ - flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP; break; case RTE_FLOW_ITEM_TYPE_VXLAN: size += SZ_NLATTR_TYPE_OF(uint32_t); - flags |= MLX5_FLOW_LAYER_VXLAN; break; default: DRV_LOG(WARNING, @@ -2450,7 +2459,6 @@ flow_tcf_get_items_and_size(const struct rte_flow_attr *attr, break; } } - *item_flags = flags; return size; } @@ -2666,10 +2674,6 @@ flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle) * Pointer to the list of items. * @param[in] actions * Pointer to the list of actions. - * @param[out] item_flags - * Pointer to bit mask of all items detected. - * @param[out] action_flags - * Pointer to bit mask of all actions detected. * @param[out] error * Pointer to the error structure. * @@ -2681,7 +2685,6 @@ static struct mlx5_flow * flow_tcf_prepare(const struct rte_flow_attr *attr, const struct rte_flow_item items[], const struct rte_flow_action actions[], - uint64_t *item_flags, uint64_t *action_flags, struct rte_flow_error *error) { size_t size = RTE_ALIGN_CEIL @@ -2690,12 +2693,13 @@ flow_tcf_prepare(const struct rte_flow_attr *attr, MNL_ALIGN(sizeof(struct nlmsghdr)) + MNL_ALIGN(sizeof(struct tcmsg)); struct mlx5_flow *dev_flow; + uint64_t action_flags = 0; struct nlmsghdr *nlh; struct tcmsg *tcm; uint8_t *sp, *tun = NULL; - size += flow_tcf_get_items_and_size(attr, items, item_flags); - size += flow_tcf_get_actions_and_size(actions, action_flags); + size += flow_tcf_get_items_size(attr, items); + size += flow_tcf_get_actions_and_size(actions, &action_flags); dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO); if (!dev_flow) { rte_flow_error_set(error, ENOMEM, @@ -2704,7 +2708,7 @@ flow_tcf_prepare(const struct rte_flow_attr *attr, return NULL; } sp = (uint8_t *)(dev_flow + 1); - if (*action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP) { + if (action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP) { sp = RTE_PTR_ALIGN (sp, alignof(struct flow_tcf_tunnel_hdr)); tun = sp; @@ -2716,7 +2720,7 @@ flow_tcf_prepare(const struct rte_flow_attr *attr, (sizeof(struct flow_tcf_vxlan_encap), MNL_ALIGNTO); #endif - } else if (*action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) { + } else if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) { sp = RTE_PTR_ALIGN (sp, alignof(struct flow_tcf_tunnel_hdr)); tun = sp; @@ -2745,9 +2749,9 @@ flow_tcf_prepare(const struct rte_flow_attr *attr, .tcm = tcm, }, }; - if (*action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) + if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) dev_flow->tcf.tunnel->type = FLOW_TCF_TUNACT_VXLAN_DECAP; - else if (*action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP) + else if (action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP) dev_flow->tcf.tunnel->type = FLOW_TCF_TUNACT_VXLAN_ENCAP; /* * Generate a reasonably unique handle based on the address of the @@ -3256,7 +3260,8 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, assert(mask.ipv4); spec.ipv4 = items->spec; if (!decap.vxlan) { - if (!eth_type_set && !vlan_eth_type_set) + if (!eth_type_set || + (!vlan_eth_type_set && vlan_present)) mnl_attr_put_u16 (nlh, vlan_present ? @@ -3313,14 +3318,14 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, assert(mask.ipv6); spec.ipv6 = items->spec; if (!decap.vxlan) { - if (!eth_type_set || !vlan_eth_type_set) { + if (!eth_type_set || + (!vlan_eth_type_set && vlan_present)) mnl_attr_put_u16 (nlh, vlan_present ? TCA_FLOWER_KEY_VLAN_ETH_TYPE : TCA_FLOWER_KEY_ETH_TYPE, RTE_BE16(ETH_P_IPV6)); - } eth_type_set = 1; vlan_eth_type_set = 1; if (mask.ipv6 == &flow_tcf_mask_empty.ipv6) @@ -3712,6 +3717,8 @@ override_na_vlan_priority: assert(na_flower); assert(na_flower_act); mnl_attr_nest_end(nlh, na_flower_act); + dev_flow->tcf.ptc_flags = mnl_attr_get_payload + (mnl_nlmsg_get_payload_tail(nlh)); mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, decap.vxlan ? 0 : TCA_CLS_FLAGS_SKIP_SW); mnl_attr_nest_end(nlh, na_flower); @@ -3730,10 +3737,6 @@ override_na_vlan_priority: * @param nlh * Message to send. This function always raises the NLM_F_ACK flag before * sending. - * @param[in] msglen - * Message length. Message buffer may contain multiple commands and - * nlmsg_len field not always corresponds to actual message length. - * If 0 specified the nlmsg_len field in header is used as message length. * @param[in] cb * Callback handler for received message. * @param[in] arg @@ -3745,52 +3748,64 @@ override_na_vlan_priority: static int flow_tcf_nl_ack(struct mlx5_flow_tcf_context *tcf, struct nlmsghdr *nlh, - uint32_t msglen, mnl_cb_t cb, void *arg) { unsigned int portid = mnl_socket_get_portid(tcf->nl); uint32_t seq = tcf->seq++; - int err, ret; + int ret, err = 0; assert(tcf->nl); assert(tcf->buf); - if (!seq) + if (!seq) { /* seq 0 is reserved for kernel event-driven notifications. */ seq = tcf->seq++; + } nlh->nlmsg_seq = seq; - if (!msglen) { - msglen = nlh->nlmsg_len; - nlh->nlmsg_flags |= NLM_F_ACK; + nlh->nlmsg_flags |= NLM_F_ACK; + ret = mnl_socket_sendto(tcf->nl, nlh, nlh->nlmsg_len); + if (ret <= 0) { + /* Message send error occurres. */ + rte_errno = errno; + return -rte_errno; } - ret = mnl_socket_sendto(tcf->nl, nlh, msglen); - err = (ret <= 0) ? errno : 0; nlh = (struct nlmsghdr *)(tcf->buf); /* * The following loop postpones non-fatal errors until multipart * messages are complete. */ - if (ret > 0) - while (true) { - ret = mnl_socket_recvfrom(tcf->nl, tcf->buf, - tcf->buf_size); + while (true) { + ret = mnl_socket_recvfrom(tcf->nl, tcf->buf, tcf->buf_size); + if (ret < 0) { + err = errno; + /* + * In case of overflow Will receive till + * end of multipart message. We may lost part + * of reply messages but mark and return an error. + */ + if (err != ENOSPC || + !(nlh->nlmsg_flags & NLM_F_MULTI) || + nlh->nlmsg_type == NLMSG_DONE) + break; + } else { + ret = mnl_cb_run(nlh, ret, seq, portid, cb, arg); + if (!ret) { + /* + * libmnl returns 0 if DONE or + * success ACK message found. + */ + break; + } if (ret < 0) { + /* + * ACK message with error found + * or some error occurred. + */ err = errno; - if (err != ENOSPC) - break; - } - if (!err) { - ret = mnl_cb_run(nlh, ret, seq, portid, - cb, arg); - if (ret < 0) { - err = errno; - break; - } - } - /* Will receive till end of multipart message */ - if (!(nlh->nlmsg_flags & NLM_F_MULTI) || - nlh->nlmsg_type == NLMSG_DONE) break; + } + /* We should continue receiving. */ } + } if (!err) return 0; rte_errno = err; @@ -3859,30 +3874,6 @@ flow_tcf_alloc_nlcmd(struct tcf_nlcb_context *ctx, uint32_t size) return nlh; } -/** - * Set NLM_F_ACK flags in the last netlink command in buffer. - * Only last command in the buffer will be acked by system. - * - * @param[in, out] buf - * Pointer to buffer with netlink commands. - */ -static void -flow_tcf_setack_nlcmd(struct tcf_nlcb_buf *buf) -{ - struct nlmsghdr *nlh; - uint32_t size = 0; - - assert(buf->size); - do { - nlh = (struct nlmsghdr *)&buf->msg[size]; - size += NLMSG_ALIGN(nlh->nlmsg_len); - if (size >= buf->size) { - nlh->nlmsg_flags |= NLM_F_ACK; - break; - } - } while (true); -} - /** * Send the buffers with prepared netlink commands. Scans the list and * sends all found buffers. Buffers are sent and freed anyway in order @@ -3901,21 +3892,35 @@ static int flow_tcf_send_nlcmd(struct mlx5_flow_tcf_context *tcf, struct tcf_nlcb_context *ctx) { - struct tcf_nlcb_buf *bc, *bn; - struct nlmsghdr *nlh; + struct tcf_nlcb_buf *bc = LIST_FIRST(&ctx->nlbuf); int ret = 0; - bc = LIST_FIRST(&ctx->nlbuf); while (bc) { + struct tcf_nlcb_buf *bn = LIST_NEXT(bc, next); + struct nlmsghdr *nlh; + uint32_t msg = 0; int rc; - bn = LIST_NEXT(bc, next); - if (bc->size) { - flow_tcf_setack_nlcmd(bc); - nlh = (struct nlmsghdr *)&bc->msg; - rc = flow_tcf_nl_ack(tcf, nlh, bc->size, NULL, NULL); - if (rc && !ret) - ret = rc; + while (msg < bc->size) { + /* + * Send Netlink commands from buffer in one by one + * fashion. If we send multiple rule deletion commands + * in one Netlink message and some error occurs it may + * cause multiple ACK error messages and break sequence + * numbers of Netlink communication, because we expect + * the only one ACK reply. + */ + assert((bc->size - msg) >= sizeof(struct nlmsghdr)); + nlh = (struct nlmsghdr *)&bc->msg[msg]; + assert((bc->size - msg) >= nlh->nlmsg_len); + msg += nlh->nlmsg_len; + rc = flow_tcf_nl_ack(tcf, nlh, NULL, NULL); + if (rc) { + DRV_LOG(WARNING, + "netlink: cleanup error %d", rc); + if (!ret) + ret = rc; + } } rte_free(bc); bc = bn; @@ -3948,6 +3953,7 @@ flow_tcf_collect_local_cb(const struct nlmsghdr *nlh, void *arg) struct nlattr *na_local = NULL; struct nlattr *na_peer = NULL; unsigned char family; + uint32_t size; if (nlh->nlmsg_type != RTM_NEWADDR) { rte_errno = EINVAL; @@ -3975,11 +3981,11 @@ flow_tcf_collect_local_cb(const struct nlmsghdr *nlh, void *arg) if (!na_local || !na_peer) return 1; /* Local rule found with scope link, permanent and assigned peer. */ - cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct nlmsghdr)) + - MNL_ALIGN(sizeof(struct ifaddrmsg)) + - (family == AF_INET6 - ? 2 * SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) - : 2 * SZ_NLATTR_TYPE_OF(uint32_t))); + size = MNL_ALIGN(sizeof(struct nlmsghdr)) + + MNL_ALIGN(sizeof(struct ifaddrmsg)) + + (family == AF_INET6 ? 2 * SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) + : 2 * SZ_NLATTR_TYPE_OF(uint32_t)); + cmd = flow_tcf_alloc_nlcmd(ctx, size); if (!cmd) { rte_errno = ENOMEM; return -rte_errno; @@ -4004,6 +4010,7 @@ flow_tcf_collect_local_cb(const struct nlmsghdr *nlh, void *arg) mnl_attr_put(cmd, IFA_ADDRESS, IPV6_ADDR_LEN, mnl_attr_get_payload(na_peer)); } + assert(size == cmd->nlmsg_len); return 1; } @@ -4040,7 +4047,7 @@ flow_tcf_encap_local_cleanup(struct mlx5_flow_tcf_context *tcf, ifa->ifa_family = AF_UNSPEC; ifa->ifa_index = ifindex; ifa->ifa_scope = RT_SCOPE_LINK; - ret = flow_tcf_nl_ack(tcf, nlh, 0, flow_tcf_collect_local_cb, &ctx); + ret = flow_tcf_nl_ack(tcf, nlh, flow_tcf_collect_local_cb, &ctx); if (ret) DRV_LOG(WARNING, "netlink: query device list error %d", ret); ret = flow_tcf_send_nlcmd(tcf, &ctx); @@ -4072,6 +4079,7 @@ flow_tcf_collect_neigh_cb(const struct nlmsghdr *nlh, void *arg) struct nlattr *na_ip = NULL; struct nlattr *na_mac = NULL; unsigned char family; + uint32_t size; if (nlh->nlmsg_type != RTM_NEWNEIGH) { rte_errno = EINVAL; @@ -4098,12 +4106,12 @@ flow_tcf_collect_neigh_cb(const struct nlmsghdr *nlh, void *arg) if (!na_mac || !na_ip) return 1; /* Neigh rule with permenent attribute found. */ - cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct nlmsghdr)) + - MNL_ALIGN(sizeof(struct ndmsg)) + - SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) + - (family == AF_INET6 - ? SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) - : SZ_NLATTR_TYPE_OF(uint32_t))); + size = MNL_ALIGN(sizeof(struct nlmsghdr)) + + MNL_ALIGN(sizeof(struct ndmsg)) + + SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) + + (family == AF_INET6 ? SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) + : SZ_NLATTR_TYPE_OF(uint32_t)); + cmd = flow_tcf_alloc_nlcmd(ctx, size); if (!cmd) { rte_errno = ENOMEM; return -rte_errno; @@ -4126,6 +4134,7 @@ flow_tcf_collect_neigh_cb(const struct nlmsghdr *nlh, void *arg) } mnl_attr_put(cmd, NDA_LLADDR, ETHER_ADDR_LEN, mnl_attr_get_payload(na_mac)); + assert(size == cmd->nlmsg_len); return 1; } @@ -4159,7 +4168,7 @@ flow_tcf_encap_neigh_cleanup(struct mlx5_flow_tcf_context *tcf, ndm->ndm_family = AF_UNSPEC; ndm->ndm_ifindex = ifindex; ndm->ndm_state = NUD_PERMANENT; - ret = flow_tcf_nl_ack(tcf, nlh, 0, flow_tcf_collect_neigh_cb, &ctx); + ret = flow_tcf_nl_ack(tcf, nlh, flow_tcf_collect_neigh_cb, &ctx); if (ret) DRV_LOG(WARNING, "netlink: query device list error %d", ret); ret = flow_tcf_send_nlcmd(tcf, &ctx); @@ -4192,6 +4201,7 @@ flow_tcf_collect_vxlan_cb(const struct nlmsghdr *nlh, void *arg) struct nlattr *na_vxlan = NULL; bool found = false; unsigned int vxindex; + uint32_t size; if (nlh->nlmsg_type != RTM_NEWLINK) { rte_errno = EINVAL; @@ -4237,9 +4247,10 @@ flow_tcf_collect_vxlan_cb(const struct nlmsghdr *nlh, void *arg) return 1; /* Attached VXLAN device found, store the command to delete. */ vxindex = ifm->ifi_index; - cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct nlmsghdr)) + - MNL_ALIGN(sizeof(struct ifinfomsg))); - if (!nlh) { + size = MNL_ALIGN(sizeof(struct nlmsghdr)) + + MNL_ALIGN(sizeof(struct ifinfomsg)); + cmd = flow_tcf_alloc_nlcmd(ctx, size); + if (!cmd) { rte_errno = ENOMEM; return -rte_errno; } @@ -4249,6 +4260,7 @@ flow_tcf_collect_vxlan_cb(const struct nlmsghdr *nlh, void *arg) ifm = mnl_nlmsg_put_extra_header(cmd, sizeof(*ifm)); ifm->ifi_family = AF_UNSPEC; ifm->ifi_index = vxindex; + assert(size == cmd->nlmsg_len); return 1; } @@ -4285,7 +4297,7 @@ flow_tcf_encap_iface_cleanup(struct mlx5_flow_tcf_context *tcf, nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; ifm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifm)); ifm->ifi_family = AF_UNSPEC; - ret = flow_tcf_nl_ack(tcf, nlh, 0, flow_tcf_collect_vxlan_cb, &ctx); + ret = flow_tcf_nl_ack(tcf, nlh, flow_tcf_collect_vxlan_cb, &ctx); if (ret) DRV_LOG(WARNING, "netlink: query device list error %d", ret); ret = flow_tcf_send_nlcmd(tcf, &ctx); @@ -4357,7 +4369,7 @@ flow_tcf_rule_local(struct mlx5_flow_tcf_context *tcf, sizeof(encap->ipv6.dst), &encap->ipv6.dst); } - if (!flow_tcf_nl_ack(tcf, nlh, 0, NULL, NULL)) + if (!flow_tcf_nl_ack(tcf, nlh, NULL, NULL)) return 0; return rte_flow_error_set(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, @@ -4420,7 +4432,7 @@ flow_tcf_rule_neigh(struct mlx5_flow_tcf_context *tcf, if (encap->mask & FLOW_TCF_ENCAP_ETH_DST) mnl_attr_put(nlh, NDA_LLADDR, sizeof(encap->eth.dst), &encap->eth.dst); - if (!flow_tcf_nl_ack(tcf, nlh, 0, NULL, NULL)) + if (!flow_tcf_nl_ack(tcf, nlh, NULL, NULL)) return 0; return rte_flow_error_set(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, @@ -4695,7 +4707,7 @@ flow_tcf_vtep_delete(struct mlx5_flow_tcf_context *tcf, ifm->ifi_family = AF_UNSPEC; ifm->ifi_index = vtep->ifindex; assert(sizeof(buf) >= nlh->nlmsg_len); - ret = flow_tcf_nl_ack(tcf, nlh, 0, NULL, NULL); + ret = flow_tcf_nl_ack(tcf, nlh, NULL, NULL); if (ret) DRV_LOG(WARNING, "netlink: error deleting vxlan" " encap/decap ifindex %u", @@ -4785,7 +4797,7 @@ flow_tcf_vtep_create(struct mlx5_flow_tcf_context *tcf, mnl_attr_nest_end(nlh, na_vxlan); mnl_attr_nest_end(nlh, na_info); assert(sizeof(buf) >= nlh->nlmsg_len); - ret = flow_tcf_nl_ack(tcf, nlh, 0, NULL, NULL); + ret = flow_tcf_nl_ack(tcf, nlh, NULL, NULL); if (ret) { DRV_LOG(WARNING, "netlink: VTEP %s create failure (%d)", @@ -4827,7 +4839,7 @@ flow_tcf_vtep_create(struct mlx5_flow_tcf_context *tcf, ifm->ifi_index = vtep->ifindex; ifm->ifi_flags = IFF_UP; ifm->ifi_change = IFF_UP; - ret = flow_tcf_nl_ack(tcf, nlh, 0, NULL, NULL); + ret = flow_tcf_nl_ack(tcf, nlh, NULL, NULL); if (ret) { rte_flow_error_set(error, -errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, @@ -5085,6 +5097,172 @@ flow_tcf_vtep_release(struct mlx5_flow_tcf_context *tcf, pthread_mutex_unlock(&vtep_list_mutex); } +struct tcf_nlcb_query { + uint32_t handle; + uint32_t tc_flags; + uint32_t flags_valid:1; +}; + +/** + * Collect queried rule attributes. This is callback routine called by + * libmnl mnl_cb_run() in loop for every message in received packet. + * Current implementation collects the flower flags only. + * + * @param[in] nlh + * Pointer to reply header. + * @param[in, out] arg + * Context pointer for this callback. + * + * @return + * A positive, nonzero value on success (required by libmnl + * to continue messages processing). + */ +static int +flow_tcf_collect_query_cb(const struct nlmsghdr *nlh, void *arg) +{ + struct tcf_nlcb_query *query = arg; + struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh); + struct nlattr *na, *na_opt; + bool flower = false; + + if (nlh->nlmsg_type != RTM_NEWTFILTER || + tcm->tcm_handle != query->handle) + return 1; + mnl_attr_for_each(na, nlh, sizeof(*tcm)) { + switch (mnl_attr_get_type(na)) { + case TCA_KIND: + if (strcmp(mnl_attr_get_payload(na), "flower")) { + /* Not flower filter, drop entire message. */ + return 1; + } + flower = true; + break; + case TCA_OPTIONS: + if (!flower) { + /* Not flower options, drop entire message. */ + return 1; + } + /* Check nested flower options. */ + mnl_attr_for_each_nested(na_opt, na) { + switch (mnl_attr_get_type(na_opt)) { + case TCA_FLOWER_FLAGS: + query->flags_valid = 1; + query->tc_flags = + mnl_attr_get_u32(na_opt); + break; + } + } + break; + } + } + return 1; +} + +/** + * Query a TC flower rule flags via netlink. + * + * @param[in] tcf + * Context object initialized by mlx5_flow_tcf_context_create(). + * @param[in] dev_flow + * Pointer to the flow. + * @param[out] pflags + * pointer to the data retrieved by the query. + * + * @return + * 0 on success, a negative errno value otherwise. + */ +static int +flow_tcf_query_flags(struct mlx5_flow_tcf_context *tcf, + struct mlx5_flow *dev_flow, + uint32_t *pflags) +{ + struct nlmsghdr *nlh; + struct tcmsg *tcm; + struct tcf_nlcb_query query = { + .handle = dev_flow->tcf.tcm->tcm_handle, + }; + + nlh = mnl_nlmsg_put_header(tcf->buf); + nlh->nlmsg_type = RTM_GETTFILTER; + nlh->nlmsg_flags = NLM_F_REQUEST; + tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm)); + memcpy(tcm, dev_flow->tcf.tcm, sizeof(*tcm)); + /* + * Ignore Netlink error for filter query operations. + * The reply length is sent by kernel as errno. + * Just check we got the flags option. + */ + flow_tcf_nl_ack(tcf, nlh, flow_tcf_collect_query_cb, &query); + if (!query.flags_valid) { + *pflags = 0; + return -ENOENT; + } + *pflags = query.tc_flags; + return 0; +} + +/** + * Query and check the in_hw set for specified rule. + * + * @param[in] tcf + * Context object initialized by mlx5_flow_tcf_context_create(). + * @param[in] dev_flow + * Pointer to the flow to check. + * + * @return + * 0 on success, a negative errno value otherwise. + */ +static int +flow_tcf_check_inhw(struct mlx5_flow_tcf_context *tcf, + struct mlx5_flow *dev_flow) +{ + uint32_t flags; + int ret; + + ret = flow_tcf_query_flags(tcf, dev_flow, &flags); + if (ret) + return ret; + return (flags & TCA_CLS_FLAGS_IN_HW) ? 0 : -ENOENT; +} + +/** + * Remove flow from E-Switch by sending Netlink message. + * + * @param[in] dev + * Pointer to Ethernet device. + * @param[in, out] flow + * Pointer to the sub flow. + */ +static void +flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow) +{ + struct priv *priv = dev->data->dev_private; + struct mlx5_flow_tcf_context *ctx = priv->tcf_context; + struct mlx5_flow *dev_flow; + struct nlmsghdr *nlh; + + if (!flow) + return; + dev_flow = LIST_FIRST(&flow->dev_flows); + if (!dev_flow) + return; + /* E-Switch flow can't be expanded. */ + assert(!LIST_NEXT(dev_flow, next)); + if (dev_flow->tcf.applied) { + nlh = dev_flow->tcf.nlh; + nlh->nlmsg_type = RTM_DELTFILTER; + nlh->nlmsg_flags = NLM_F_REQUEST; + flow_tcf_nl_ack(ctx, nlh, NULL, NULL); + if (dev_flow->tcf.tunnel) { + assert(dev_flow->tcf.tunnel->vtep); + flow_tcf_vtep_release(ctx, + dev_flow->tcf.tunnel->vtep, + dev_flow); + dev_flow->tcf.tunnel->vtep = NULL; + } + dev_flow->tcf.applied = 0; + } +} /** * Apply flow to E-Switch by sending Netlink message. @@ -5136,54 +5314,36 @@ flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow, *dev_flow->tcf.tunnel->ifindex_ptr = dev_flow->tcf.tunnel->vtep->ifindex; } - if (!flow_tcf_nl_ack(ctx, nlh, 0, NULL, NULL)) { + if (!flow_tcf_nl_ack(ctx, nlh, NULL, NULL)) { dev_flow->tcf.applied = 1; + if (*dev_flow->tcf.ptc_flags & TCA_CLS_FLAGS_SKIP_SW) + return 0; + /* + * Rule was applied without skip_sw flag set. + * We should check whether the rule was acctually + * accepted by hardware (have look at in_hw flag). + */ + if (flow_tcf_check_inhw(ctx, dev_flow)) { + flow_tcf_remove(dev, flow); + return rte_flow_error_set + (error, ENOENT, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "netlink: rule has no in_hw flag set"); + } return 0; } + if (dev_flow->tcf.tunnel) { + /* Rollback the VTEP configuration if rule apply failed. */ + assert(dev_flow->tcf.tunnel->vtep); + flow_tcf_vtep_release(ctx, dev_flow->tcf.tunnel->vtep, + dev_flow); + dev_flow->tcf.tunnel->vtep = NULL; + } return rte_flow_error_set(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, "netlink: failed to create TC flow rule"); } -/** - * Remove flow from E-Switch by sending Netlink message. - * - * @param[in] dev - * Pointer to Ethernet device. - * @param[in, out] flow - * Pointer to the sub flow. - */ -static void -flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow) -{ - struct priv *priv = dev->data->dev_private; - struct mlx5_flow_tcf_context *ctx = priv->tcf_context; - struct mlx5_flow *dev_flow; - struct nlmsghdr *nlh; - - if (!flow) - return; - dev_flow = LIST_FIRST(&flow->dev_flows); - if (!dev_flow) - return; - /* E-Switch flow can't be expanded. */ - assert(!LIST_NEXT(dev_flow, next)); - if (dev_flow->tcf.applied) { - nlh = dev_flow->tcf.nlh; - nlh->nlmsg_type = RTM_DELTFILTER; - nlh->nlmsg_flags = NLM_F_REQUEST; - flow_tcf_nl_ack(ctx, nlh, 0, NULL, NULL); - if (dev_flow->tcf.tunnel) { - assert(dev_flow->tcf.tunnel->vtep); - flow_tcf_vtep_release(ctx, - dev_flow->tcf.tunnel->vtep, - dev_flow); - dev_flow->tcf.tunnel->vtep = NULL; - } - dev_flow->tcf.applied = 0; - } -} - /** * Remove flow from E-Switch and release resources of the device flow. * @@ -5503,7 +5663,7 @@ flow_tcf_nl_filter_parse_and_get(struct nlmsghdr *cnlh, * Message received from Netlink. * @param[out] data * Pointer to data area to be filled by the parsing routine. - * assumed to be a pinter to struct flow_tcf_stats_basic. + * assumed to be a pointer to struct flow_tcf_stats_basic. * * @return * MNL_CB_OK value. @@ -5551,7 +5711,7 @@ flow_tcf_query_count(struct rte_eth_dev *dev, void *data, struct rte_flow_error *error) { - struct flow_tcf_stats_basic sb_data = { 0 }; + struct flow_tcf_stats_basic sb_data; struct rte_flow_query_count *qc = data; struct priv *priv = dev->data->dev_private; struct mlx5_flow_tcf_context *ctx = priv->tcf_context; @@ -5562,6 +5722,7 @@ flow_tcf_query_count(struct rte_eth_dev *dev, ssize_t ret; assert(qc); + memset(&sb_data, 0, sizeof(sb_data)); dev_flow = LIST_FIRST(&flow->dev_flows); /* E-Switch flow can't be expanded. */ assert(!LIST_NEXT(dev_flow, next)); @@ -5723,7 +5884,7 @@ mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx, tcm->tcm_parent = TC_H_INGRESS; assert(sizeof(buf) >= nlh->nlmsg_len); /* Ignore errors when qdisc is already absent. */ - if (flow_tcf_nl_ack(ctx, nlh, 0, NULL, NULL) && + if (flow_tcf_nl_ack(ctx, nlh, NULL, NULL) && rte_errno != EINVAL && rte_errno != ENOENT) return rte_flow_error_set(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, @@ -5740,7 +5901,7 @@ mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx, tcm->tcm_parent = TC_H_INGRESS; mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress"); assert(sizeof(buf) >= nlh->nlmsg_len); - if (flow_tcf_nl_ack(ctx, nlh, 0, NULL, NULL)) + if (flow_tcf_nl_ack(ctx, nlh, NULL, NULL)) return rte_flow_error_set(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, "netlink: failed to create ingress"