X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Ftap%2Ftap_flow.c;h=41f734522c4bd41ab9c840330b8e9bc36d92f76c;hb=1ff8e79591a6cc99c475a50e87adb30d9b20569a;hp=7f1693d4046884e079ce7a0a26dd225ade6ef52e;hpb=2bc06869cd94195e986cfb7939a549d7050097e8;p=dpdk.git diff --git a/drivers/net/tap/tap_flow.c b/drivers/net/tap/tap_flow.c index 7f1693d404..41f734522c 100644 --- a/drivers/net/tap/tap_flow.c +++ b/drivers/net/tap/tap_flow.c @@ -31,6 +31,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include +#include #include #include @@ -80,6 +82,8 @@ enum { }; #endif +#define ISOLATE_HANDLE 1 + struct rte_flow { LIST_ENTRY(rte_flow) next; /* Pointer to the next rte_flow structure */ struct rte_flow *remote_flow; /* associated remote flow */ @@ -96,6 +100,7 @@ struct convert_data { struct remote_rule { struct rte_flow_attr attr; struct rte_flow_item items[2]; + struct rte_flow_action actions[2]; int mirred; }; @@ -124,11 +129,17 @@ tap_flow_destroy(struct rte_eth_dev *dev, struct rte_flow *flow, struct rte_flow_error *error); +static int +tap_flow_isolate(struct rte_eth_dev *dev, + int set, + struct rte_flow_error *error); + static const struct rte_flow_ops tap_flow_ops = { .validate = tap_flow_validate, .create = tap_flow_create, .destroy = tap_flow_destroy, .flush = tap_flow_flush, + .isolate = tap_flow_isolate, }; /* Static initializer for items. */ @@ -256,6 +267,47 @@ static const struct tap_flow_items tap_flow_items[] = { }, }; +/* + * TC rules, by growing priority + * + * Remote netdevice Tap netdevice + * +-------------+-------------+ +-------------+-------------+ + * | Ingress | Egress | | Ingress | Egress | + * |-------------|-------------| |-------------|-------------| + * | | \ / | | | REMOTE TX | prio 1 + * | | \ / | | | \ / | prio 2 + * | EXPLICIT | \ / | | EXPLICIT | \ / | . + * | | \ / | | | \ / | . + * | RULES | X | | RULES | X | . + * | . | / \ | | . | / \ | . + * | . | / \ | | . | / \ | . + * | . | / \ | | . | / \ | . + * | . | / \ | | . | / \ | . + * + * .... .... .... .... + * + * | . | \ / | | . | \ / | . + * | . | \ / | | . | \ / | . + * | | \ / | | | \ / | + * | LOCAL_MAC | \ / | | \ / | \ / | last prio - 5 + * | PROMISC | X | | \ / | X | last prio - 4 + * | ALLMULTI | / \ | | X | / \ | last prio - 3 + * | BROADCAST | / \ | | / \ | / \ | last prio - 2 + * | BROADCASTV6 | / \ | | / \ | / \ | last prio - 1 + * | xx | / \ | | ISOLATE | / \ | last prio + * +-------------+-------------+ +-------------+-------------+ + * + * The implicit flow rules are stored in a list in with mandatorily the last two + * being the ISOLATE and REMOTE_TX rules. e.g.: + * + * LOCAL_MAC -> BROADCAST -> BROADCASTV6 -> REMOTE_TX -> ISOLATE -> NULL + * + * That enables tap_flow_isolate() to remove implicit rules by popping the list + * head and remove it as long as it applies on the remote netdevice. The + * implicit rule for TX redirection is not removed, as isolate concerns only + * incoming traffic. + */ + static struct remote_rule implicit_rte_flows[TAP_REMOTE_MAX_IDX] = { [TAP_REMOTE_LOCAL_MAC] = { .attr = { @@ -362,6 +414,19 @@ static struct remote_rule implicit_rte_flows[TAP_REMOTE_MAX_IDX] = { }, .mirred = TCA_EGRESS_MIRROR, }, + [TAP_ISOLATE] = { + .attr = { + .group = MAX_GROUP, + .priority = PRIORITY_MASK - TAP_ISOLATE, + .ingress = 1, + }, + .items[0] = { + .type = RTE_FLOW_ITEM_TYPE_VOID, + }, + .items[1] = { + .type = RTE_FLOW_ITEM_TYPE_END, + }, + }, }; /** @@ -399,9 +464,6 @@ tap_flow_create_eth(const struct rte_flow_item *item, void *data) if (!flow) return 0; msg = &flow->msg; - if (spec->type & mask->type) - msg->t.tcm_info = TC_H_MAKE(msg->t.tcm_info, - (spec->type & mask->type)); if (!is_zero_ether_addr(&spec->dst)) { nlattr_add(&msg->nh, TCA_FLOWER_KEY_ETH_DST, ETHER_ADDR_LEN, &spec->dst.addr_bytes); @@ -506,8 +568,6 @@ tap_flow_create_ipv4(const struct rte_flow_item *item, void *data) msg = &flow->msg; if (!info->eth_type) info->eth_type = htons(ETH_P_IP); - if (!info->vlan) - msg->t.tcm_info = TC_H_MAKE(msg->t.tcm_info, htons(ETH_P_IP)); if (!spec) return 0; if (spec->hdr.dst_addr) { @@ -564,8 +624,6 @@ tap_flow_create_ipv6(const struct rte_flow_item *item, void *data) msg = &flow->msg; if (!info->eth_type) info->eth_type = htons(ETH_P_IPV6); - if (!info->vlan) - msg->t.tcm_info = TC_H_MAKE(msg->t.tcm_info, htons(ETH_P_IPV6)); if (!spec) return 0; if (memcmp(spec->hdr.dst_addr, empty_addr, 16)) { @@ -612,18 +670,20 @@ tap_flow_create_udp(const struct rte_flow_item *item, void *data) /* check that previous ip_proto is compatible with udp */ if (info->ip_proto && info->ip_proto != IPPROTO_UDP) return -1; + /* TC does not support UDP port masking. Only accept if exact match. */ + if ((mask->hdr.src_port && mask->hdr.src_port != 0xffff) || + (mask->hdr.dst_port && mask->hdr.dst_port != 0xffff)) + return -1; if (!flow) return 0; msg = &flow->msg; nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_UDP); if (!spec) return 0; - if (spec->hdr.dst_port && - (spec->hdr.dst_port & mask->hdr.dst_port) == spec->hdr.dst_port) + if (spec->hdr.dst_port & mask->hdr.dst_port) nlattr_add16(&msg->nh, TCA_FLOWER_KEY_UDP_DST, spec->hdr.dst_port); - if (spec->hdr.src_port && - (spec->hdr.src_port & mask->hdr.src_port) == spec->hdr.src_port) + if (spec->hdr.src_port & mask->hdr.src_port) nlattr_add16(&msg->nh, TCA_FLOWER_KEY_UDP_SRC, spec->hdr.src_port); return 0; @@ -656,18 +716,20 @@ tap_flow_create_tcp(const struct rte_flow_item *item, void *data) /* check that previous ip_proto is compatible with tcp */ if (info->ip_proto && info->ip_proto != IPPROTO_TCP) return -1; + /* TC does not support TCP port masking. Only accept if exact match. */ + if ((mask->hdr.src_port && mask->hdr.src_port != 0xffff) || + (mask->hdr.dst_port && mask->hdr.dst_port != 0xffff)) + return -1; if (!flow) return 0; msg = &flow->msg; nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_TCP); if (!spec) return 0; - if (spec->hdr.dst_port && - (spec->hdr.dst_port & mask->hdr.dst_port) == spec->hdr.dst_port) + if (spec->hdr.dst_port & mask->hdr.dst_port) nlattr_add16(&msg->nh, TCA_FLOWER_KEY_TCP_DST, spec->hdr.dst_port); - if (spec->hdr.src_port && - (spec->hdr.src_port & mask->hdr.src_port) == spec->hdr.src_port) + if (spec->hdr.src_port & mask->hdr.src_port) nlattr_add16(&msg->nh, TCA_FLOWER_KEY_TCP_SRC, spec->hdr.src_port); return 0; @@ -972,16 +1034,13 @@ priv_flow_process(struct pmd_internals *pmd, if (err) goto exit_item_not_supported; if (flow && cur_item->convert) { - if (!pmd->flower_vlan_support && - cur_item->convert == tap_flow_create_vlan) - goto exit_item_not_supported; err = cur_item->convert(items, &data); if (err) goto exit_item_not_supported; } } if (flow) { - if (pmd->flower_vlan_support && data.vlan) { + if (data.vlan) { nlattr_add16(&flow->msg.nh, TCA_FLOWER_KEY_ETH_TYPE, htons(ETH_P_8021Q)); nlattr_add16(&flow->msg.nh, @@ -1165,8 +1224,12 @@ tap_flow_create(struct rte_eth_dev *dev, } err = nl_recv_ack(pmd->nlsk_fd); if (err < 0) { + RTE_LOG(ERR, PMD, + "Kernel refused TC filter rule creation (%d): %s\n", + errno, strerror(errno)); rte_flow_error_set(error, EEXIST, RTE_FLOW_ERROR_TYPE_HANDLE, - NULL, "overlapping rules"); + NULL, + "overlapping rules or Kernel too old for flower support"); goto fail; } LIST_INSERT_HEAD(&pmd->flows, flow, next); @@ -1206,9 +1269,13 @@ tap_flow_create(struct rte_eth_dev *dev, } err = nl_recv_ack(pmd->nlsk_fd); if (err < 0) { + RTE_LOG(ERR, PMD, + "Kernel refused TC filter rule creation (%d): %s\n", + errno, strerror(errno)); rte_flow_error_set( error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, - NULL, "overlapping rules"); + NULL, + "overlapping rules or Kernel too old for flower support"); goto fail; } flow->remote_flow = remote_flow; @@ -1253,7 +1320,13 @@ tap_flow_destroy_pmd(struct pmd_internals *pmd, goto end; } ret = nl_recv_ack(pmd->nlsk_fd); + /* If errno is ENOENT, the rule is already no longer in the kernel. */ + if (ret < 0 && errno == ENOENT) + ret = 0; if (ret < 0) { + RTE_LOG(ERR, PMD, + "Kernel refused TC filter rule deletion (%d): %s\n", + errno, strerror(errno)); rte_flow_error_set( error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, "couldn't receive kernel ack to our request"); @@ -1271,7 +1344,12 @@ tap_flow_destroy_pmd(struct pmd_internals *pmd, goto end; } ret = nl_recv_ack(pmd->nlsk_fd); + if (ret < 0 && errno == ENOENT) + ret = 0; if (ret < 0) { + RTE_LOG(ERR, PMD, + "Kernel refused TC filter rule deletion (%d): %s\n", + errno, strerror(errno)); rte_flow_error_set( error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, "Failure trying to receive nl ack"); @@ -1301,6 +1379,78 @@ tap_flow_destroy(struct rte_eth_dev *dev, return tap_flow_destroy_pmd(pmd, flow, error); } +/** + * Enable/disable flow isolation. + * + * @see rte_flow_isolate() + * @see rte_flow_ops + */ +static int +tap_flow_isolate(struct rte_eth_dev *dev, + int set, + struct rte_flow_error *error __rte_unused) +{ + struct pmd_internals *pmd = dev->data->dev_private; + + if (set) + pmd->flow_isolate = 1; + else + pmd->flow_isolate = 0; + /* + * If netdevice is there, setup appropriate flow rules immediately. + * Otherwise it will be set when bringing up the netdevice (tun_alloc). + */ + if (!pmd->rxq[0].fd) + return 0; + if (set) { + struct rte_flow *flow; + + while (1) { + flow = LIST_FIRST(&pmd->implicit_flows); + if (!flow) + break; + /* + * Remove all implicit rules on the remote. + * Keep the local rule to redirect packets on TX. + * Keep also the last implicit local rule: ISOLATE. + */ + if (flow->msg.t.tcm_ifindex == pmd->if_index) + break; + if (tap_flow_destroy_pmd(pmd, flow, NULL) < 0) + goto error; + } + /* Switch the TC rule according to pmd->flow_isolate */ + if (tap_flow_implicit_create(pmd, TAP_ISOLATE) == -1) + goto error; + } else { + /* Switch the TC rule according to pmd->flow_isolate */ + if (tap_flow_implicit_create(pmd, TAP_ISOLATE) == -1) + goto error; + if (!pmd->remote_if_index) + return 0; + if (tap_flow_implicit_create(pmd, TAP_REMOTE_TX) < 0) + goto error; + if (tap_flow_implicit_create(pmd, TAP_REMOTE_LOCAL_MAC) < 0) + goto error; + if (tap_flow_implicit_create(pmd, TAP_REMOTE_BROADCAST) < 0) + goto error; + if (tap_flow_implicit_create(pmd, TAP_REMOTE_BROADCASTV6) < 0) + goto error; + if (dev->data->promiscuous && + tap_flow_implicit_create(pmd, TAP_REMOTE_PROMISC) < 0) + goto error; + if (dev->data->all_multicast && + tap_flow_implicit_create(pmd, TAP_REMOTE_ALLMULTI) < 0) + goto error; + } + return 0; +error: + pmd->flow_isolate = 0; + return -rte_flow_error_set( + error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "TC rule creation failed"); +} + /** * Destroy all flows. * @@ -1335,6 +1485,13 @@ tap_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error) int tap_flow_implicit_create(struct pmd_internals *pmd, enum implicit_rule_index idx) { + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; + struct rte_flow_action *actions = implicit_rte_flows[idx].actions; + struct rte_flow_action isolate_actions[2] = { + [1] = { + .type = RTE_FLOW_ACTION_TYPE_END, + }, + }; struct rte_flow_item *items = implicit_rte_flows[idx].items; struct rte_flow_attr *attr = &implicit_rte_flows[idx].attr; struct rte_flow_item_eth eth_local = { .type = 0 }; @@ -1355,12 +1512,20 @@ int tap_flow_implicit_create(struct pmd_internals *pmd, remote_flow = rte_malloc(__func__, sizeof(struct rte_flow), 0); if (!remote_flow) { - RTE_LOG(ERR, PMD, "Cannot allocate memory for rte_flow"); + RTE_LOG(ERR, PMD, "Cannot allocate memory for rte_flow\n"); goto fail; } msg = &remote_flow->msg; if (idx == TAP_REMOTE_TX) { if_index = pmd->if_index; + } else if (idx == TAP_ISOLATE) { + if_index = pmd->if_index; + /* Don't be exclusive for this rule, it can be changed later. */ + flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE; + isolate_actions[0].type = pmd->flow_isolate ? + RTE_FLOW_ACTION_TYPE_DROP : + RTE_FLOW_ACTION_TYPE_PASSTHRU; + actions = isolate_actions; } else if (idx == TAP_REMOTE_LOCAL_MAC) { /* * eth addr couldn't be set in implicit_rte_flows[] as it is not @@ -1369,24 +1534,32 @@ int tap_flow_implicit_create(struct pmd_internals *pmd, memcpy(ð_local.dst, &pmd->eth_addr, sizeof(pmd->eth_addr)); items = items_local; } - tc_init_msg(msg, if_index, RTM_NEWTFILTER, - NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE); + tc_init_msg(msg, if_index, RTM_NEWTFILTER, flags); msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL)); - tap_flow_set_handle(remote_flow); - if (priv_flow_process(pmd, attr, items, NULL, NULL, + /* + * The ISOLATE rule is always present and must have a static handle, as + * the action is changed whether the feature is enabled (DROP) or + * disabled (PASSTHRU). + */ + if (idx == TAP_ISOLATE) + remote_flow->msg.t.tcm_handle = ISOLATE_HANDLE; + else + tap_flow_set_handle(remote_flow); + if (priv_flow_process(pmd, attr, items, actions, NULL, remote_flow, implicit_rte_flows[idx].mirred)) { RTE_LOG(ERR, PMD, "rte flow rule validation failed\n"); goto fail; } err = nl_send(pmd->nlsk_fd, &msg->nh); if (err < 0) { - RTE_LOG(ERR, PMD, "Failure sending nl request"); + RTE_LOG(ERR, PMD, "Failure sending nl request\n"); goto fail; } err = nl_recv_ack(pmd->nlsk_fd); if (err < 0) { RTE_LOG(ERR, PMD, - "Kernel refused TC filter rule creation"); + "Kernel refused TC filter rule creation (%d): %s\n", + errno, strerror(errno)); goto fail; } LIST_INSERT_HEAD(&pmd->implicit_flows, remote_flow, next); @@ -1464,10 +1637,6 @@ tap_dev_filter_ctrl(struct rte_eth_dev *dev, enum rte_filter_op filter_op, void *arg) { - struct pmd_internals *pmd = dev->data->dev_private; - - if (!pmd->flower_support) - return -ENOTSUP; switch (filter_type) { case RTE_ETH_FILTER_GENERIC: if (filter_op != RTE_ETH_FILTER_GET) @@ -1475,7 +1644,7 @@ tap_dev_filter_ctrl(struct rte_eth_dev *dev, *(const void **)arg = &tap_flow_ops; return 0; default: - RTE_LOG(ERR, PMD, "%p: filter type (%d) not supported", + RTE_LOG(ERR, PMD, "%p: filter type (%d) not supported\n", (void *)dev, filter_type); } return -EINVAL;