-/*-
- * BSD LICENSE
- *
- * Copyright 2017 6WIND S.A.
- * Copyright 2017 Mellanox
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of 6WIND S.A. nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox Technologies, Ltd
*/
/**
#include <rte_byteorder.h>
#include <rte_errno.h>
#include <rte_eth_ctrl.h>
-#include <rte_ethdev.h>
+#include <rte_ethdev_driver.h>
+#include <rte_ether.h>
#include <rte_flow.h>
#include <rte_flow_driver.h>
#include <rte_malloc.h>
/* PMD headers. */
#include "mlx4.h"
+#include "mlx4_glue.h"
#include "mlx4_flow.h"
#include "mlx4_rxtx.h"
#include "mlx4_utils.h"
uint32_t refcnt; /**< Reference count. */
};
+/**
+ * Convert supported RSS hash field types between DPDK and Verbs formats.
+ *
+ * This function returns the supported (default) set when @p types has
+ * special value 0.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param types
+ * Depending on @p verbs_to_dpdk, hash types in either DPDK (see struct
+ * rte_eth_rss_conf) or Verbs format.
+ * @param verbs_to_dpdk
+ * A zero value converts @p types from DPDK to Verbs, a nonzero value
+ * performs the reverse operation.
+ *
+ * @return
+ * Converted RSS hash fields on success, (uint64_t)-1 otherwise and
+ * rte_errno is set.
+ */
+uint64_t
+mlx4_conv_rss_types(struct priv *priv, uint64_t types, int verbs_to_dpdk)
+{
+ enum {
+ INNER,
+ IPV4, IPV4_1, IPV4_2, IPV6, IPV6_1, IPV6_2, IPV6_3,
+ TCP, UDP,
+ IPV4_TCP, IPV4_UDP, IPV6_TCP, IPV6_TCP_1, IPV6_UDP, IPV6_UDP_1,
+ };
+ enum {
+ VERBS_IPV4 = IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4,
+ VERBS_IPV6 = IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6,
+ VERBS_TCP = IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_DST_PORT_TCP,
+ VERBS_UDP = IBV_RX_HASH_SRC_PORT_UDP | IBV_RX_HASH_DST_PORT_UDP,
+ };
+ static const uint64_t dpdk[] = {
+ [INNER] = 0,
+ [IPV4] = ETH_RSS_IPV4,
+ [IPV4_1] = ETH_RSS_FRAG_IPV4,
+ [IPV4_2] = ETH_RSS_NONFRAG_IPV4_OTHER,
+ [IPV6] = ETH_RSS_IPV6,
+ [IPV6_1] = ETH_RSS_FRAG_IPV6,
+ [IPV6_2] = ETH_RSS_NONFRAG_IPV6_OTHER,
+ [IPV6_3] = ETH_RSS_IPV6_EX,
+ [TCP] = 0,
+ [UDP] = 0,
+ [IPV4_TCP] = ETH_RSS_NONFRAG_IPV4_TCP,
+ [IPV4_UDP] = ETH_RSS_NONFRAG_IPV4_UDP,
+ [IPV6_TCP] = ETH_RSS_NONFRAG_IPV6_TCP,
+ [IPV6_TCP_1] = ETH_RSS_IPV6_TCP_EX,
+ [IPV6_UDP] = ETH_RSS_NONFRAG_IPV6_UDP,
+ [IPV6_UDP_1] = ETH_RSS_IPV6_UDP_EX,
+ };
+ static const uint64_t verbs[RTE_DIM(dpdk)] = {
+ [INNER] = IBV_RX_HASH_INNER,
+ [IPV4] = VERBS_IPV4,
+ [IPV4_1] = VERBS_IPV4,
+ [IPV4_2] = VERBS_IPV4,
+ [IPV6] = VERBS_IPV6,
+ [IPV6_1] = VERBS_IPV6,
+ [IPV6_2] = VERBS_IPV6,
+ [IPV6_3] = VERBS_IPV6,
+ [TCP] = VERBS_TCP,
+ [UDP] = VERBS_UDP,
+ [IPV4_TCP] = VERBS_IPV4 | VERBS_TCP,
+ [IPV4_UDP] = VERBS_IPV4 | VERBS_UDP,
+ [IPV6_TCP] = VERBS_IPV6 | VERBS_TCP,
+ [IPV6_TCP_1] = VERBS_IPV6 | VERBS_TCP,
+ [IPV6_UDP] = VERBS_IPV6 | VERBS_UDP,
+ [IPV6_UDP_1] = VERBS_IPV6 | VERBS_UDP,
+ };
+ const uint64_t *in = verbs_to_dpdk ? verbs : dpdk;
+ const uint64_t *out = verbs_to_dpdk ? dpdk : verbs;
+ uint64_t seen = 0;
+ uint64_t conv = 0;
+ unsigned int i;
+
+ if (!types) {
+ if (!verbs_to_dpdk)
+ return priv->hw_rss_sup;
+ types = priv->hw_rss_sup;
+ }
+ for (i = 0; i != RTE_DIM(dpdk); ++i)
+ if (in[i] && (types & in[i]) == in[i]) {
+ seen |= types & in[i];
+ conv |= out[i];
+ }
+ if ((verbs_to_dpdk || (conv & priv->hw_rss_sup) == conv) &&
+ !(types & ~seen))
+ return conv;
+ rte_errno = ENOTSUP;
+ return (uint64_t)-1;
+}
+
/**
* Merge Ethernet pattern item into flow rule handle.
*
* Additional mlx4-specific constraints on supported fields:
*
- * - No support for partial masks.
+ * - No support for partial masks, except in the specific case of matching
+ * all multicast traffic (@p spec->dst and @p mask->dst equal to
+ * 01:00:00:00:00:00).
* - Not providing @p item->spec or providing an empty @p mask->dst is
* *only* supported if the rule doesn't specify additional matching
* criteria (i.e. rule is promiscuous-like).
const char *msg;
unsigned int i;
- if (!mask) {
- flow->promisc = 1;
- } else {
+ if (mask) {
uint32_t sum_dst = 0;
uint32_t sum_src = 0;
goto error;
} else if (!sum_dst) {
flow->promisc = 1;
+ } else if (sum_dst == 1 && mask->dst.addr_bytes[0] == 1) {
+ if (!(spec->dst.addr_bytes[0] & 1)) {
+ msg = "mlx4 does not support the explicit"
+ " exclusion of all multicast traffic";
+ goto error;
+ }
+ flow->allmulti = 1;
} else if (sum_dst != (UINT8_C(0xff) * ETHER_ADDR_LEN)) {
msg = "mlx4 does not support matching partial"
" Ethernet fields";
flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
return 0;
}
+ if (flow->allmulti) {
+ flow->ibv_attr->type = IBV_FLOW_ATTR_MC_DEFAULT;
+ return 0;
+ }
++flow->ibv_attr->num_of_specs;
eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
*eth = (struct ibv_flow_spec_eth) {
.type = IBV_FLOW_SPEC_ETH,
.size = sizeof(*eth),
};
+ if (!mask) {
+ flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
+ return 0;
+ }
memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
/* Remove unwanted bits from values. */
eth->val.vlan_tag = spec->tci;
eth->mask.vlan_tag = mask->tci;
eth->val.vlan_tag &= eth->mask.vlan_tag;
+ if (flow->ibv_attr->type == IBV_FLOW_ATTR_ALL_DEFAULT)
+ flow->ibv_attr->type = IBV_FLOW_ATTR_NORMAL;
return 0;
error:
return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
* Additional mlx4-specific constraints on supported fields:
*
* - No support for partial masks.
+ * - Due to HW/FW limitation, flow rule priority is not taken into account
+ * when matching UDP destination ports, doing is therefore only supported
+ * at the highest priority level (0).
*
* @param[in, out] flow
* Flow rule handle to update.
struct ibv_flow_spec_tcp_udp *udp;
const char *msg;
- if (!mask ||
+ if (mask &&
((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) ||
(uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) {
msg = "mlx4 does not support matching partial UDP fields";
goto error;
}
+ if (mask && mask->hdr.dst_port && flow->priority) {
+ msg = "combining UDP destination port matching with a nonzero"
+ " priority level is not supported";
+ goto error;
+ }
if (!flow->ibv_attr)
return 0;
++flow->ibv_attr->num_of_specs;
struct ibv_flow_spec_tcp_udp *tcp;
const char *msg;
- if (!mask ||
+ if (mask &&
((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) ||
(uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) {
msg = "mlx4 does not support matching partial TCP fields";
struct rte_flow temp = { .ibv_attr_size = sizeof(*temp.ibv_attr) };
struct rte_flow *flow = &temp;
const char *msg = NULL;
+ int overlap;
if (attr->group)
return rte_flow_error_set
return rte_flow_error_set
(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
NULL, "egress is not supported");
+ if (attr->transfer)
+ return rte_flow_error_set
+ (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
+ NULL, "transfer is not supported");
if (!attr->ingress)
return rte_flow_error_set
(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
NULL, "only ingress is supported");
fill:
+ overlap = 0;
proc = mlx4_flow_proc_item_list;
+ flow->priority = attr->priority;
/* Go over pattern. */
for (item = pattern; item->type; ++item) {
const struct mlx4_flow_proc_item *next = NULL;
flow->internal = 1;
continue;
}
- if (flow->promisc) {
+ if (flow->promisc || flow->allmulti) {
msg = "mlx4 does not support additional matching"
" criteria combined with indiscriminate"
" matching on Ethernet headers";
}
/* Go over actions list. */
for (action = actions; action->type; ++action) {
+ /* This one may appear anywhere multiple times. */
+ if (action->type == RTE_FLOW_ACTION_TYPE_VOID)
+ continue;
+ /* Fate-deciding actions may appear exactly once. */
+ if (overlap) {
+ msg = "cannot combine several fate-deciding actions,"
+ " choose between DROP, QUEUE or RSS";
+ goto exit_action_not_supported;
+ }
+ overlap = 1;
switch (action->type) {
const struct rte_flow_action_queue *queue;
+ const struct rte_flow_action_rss *rss;
+ const uint8_t *rss_key;
+ uint32_t rss_key_len;
+ uint64_t fields;
+ unsigned int i;
- case RTE_FLOW_ACTION_TYPE_VOID:
- continue;
case RTE_FLOW_ACTION_TYPE_DROP:
flow->drop = 1;
break;
case RTE_FLOW_ACTION_TYPE_QUEUE:
+ if (flow->rss)
+ break;
queue = action->conf;
- if (queue->index >= priv->dev->data->nb_rx_queues)
+ if (queue->index >= priv->dev->data->nb_rx_queues) {
+ msg = "queue target index beyond number of"
+ " configured Rx queues";
+ goto exit_action_not_supported;
+ }
+ flow->rss = mlx4_rss_get
+ (priv, 0, mlx4_rss_hash_key_default, 1,
+ &queue->index);
+ if (!flow->rss) {
+ msg = "not enough resources for additional"
+ " single-queue RSS context";
+ goto exit_action_not_supported;
+ }
+ break;
+ case RTE_FLOW_ACTION_TYPE_RSS:
+ if (flow->rss)
+ break;
+ rss = action->conf;
+ /* Default RSS configuration if none is provided. */
+ if (rss->key_len) {
+ rss_key = rss->key;
+ rss_key_len = rss->key_len;
+ } else {
+ rss_key = mlx4_rss_hash_key_default;
+ rss_key_len = MLX4_RSS_HASH_KEY_SIZE;
+ }
+ /* Sanity checks. */
+ for (i = 0; i < rss->queue_num; ++i)
+ if (rss->queue[i] >=
+ priv->dev->data->nb_rx_queues)
+ break;
+ if (i != rss->queue_num) {
+ msg = "queue index target beyond number of"
+ " configured Rx queues";
+ goto exit_action_not_supported;
+ }
+ if (!rte_is_power_of_2(rss->queue_num)) {
+ msg = "for RSS, mlx4 requires the number of"
+ " queues to be a power of two";
+ goto exit_action_not_supported;
+ }
+ if (rss_key_len != sizeof(flow->rss->key)) {
+ msg = "mlx4 supports exactly one RSS hash key"
+ " length: "
+ MLX4_STR_EXPAND(MLX4_RSS_HASH_KEY_SIZE);
+ goto exit_action_not_supported;
+ }
+ for (i = 1; i < rss->queue_num; ++i)
+ if (rss->queue[i] - rss->queue[i - 1] != 1)
+ break;
+ if (i != rss->queue_num) {
+ msg = "mlx4 requires RSS contexts to use"
+ " consecutive queue indices only";
+ goto exit_action_not_supported;
+ }
+ if (rss->queue[0] % rss->queue_num) {
+ msg = "mlx4 requires the first queue of a RSS"
+ " context to be aligned on a multiple"
+ " of the context size";
+ goto exit_action_not_supported;
+ }
+ if (rss->func &&
+ rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
+ msg = "the only supported RSS hash function"
+ " is Toeplitz";
+ goto exit_action_not_supported;
+ }
+ if (rss->level) {
+ msg = "a nonzero RSS encapsulation level is"
+ " not supported";
goto exit_action_not_supported;
- flow->queue = 1;
- flow->queue_id = queue->index;
+ }
+ rte_errno = 0;
+ fields = mlx4_conv_rss_types(priv, rss->types, 0);
+ if (fields == (uint64_t)-1 && rte_errno) {
+ msg = "unsupported RSS hash type requested";
+ goto exit_action_not_supported;
+ }
+ flow->rss = mlx4_rss_get
+ (priv, fields, rss_key, rss->queue_num,
+ rss->queue);
+ if (!flow->rss) {
+ msg = "either invalid parameters or not enough"
+ " resources for additional multi-queue"
+ " RSS context";
+ goto exit_action_not_supported;
+ }
break;
default:
goto exit_action_not_supported;
}
}
- if (!flow->queue && !flow->drop)
- return rte_flow_error_set
- (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
- NULL, "no valid action");
+ /* When fate is unknown, drop traffic. */
+ if (!overlap)
+ flow->drop = 1;
/* Validation ends here. */
- if (!addr)
+ if (!addr) {
+ if (flow->rss)
+ mlx4_rss_put(flow->rss);
return 0;
+ }
if (flow == &temp) {
/* Allocate proper handle based on collected data. */
const struct mlx4_malloc_vec vec[] = {
},
};
- if (!mlx4_zmallocv(__func__, vec, RTE_DIM(vec)))
+ if (!mlx4_zmallocv(__func__, vec, RTE_DIM(vec))) {
+ if (temp.rss)
+ mlx4_rss_put(temp.rss);
return rte_flow_error_set
(error, -rte_errno,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
"flow rule handle allocation failure");
+ }
/* Most fields will be updated by second pass. */
*flow = (struct rte_flow){
.ibv_attr = temp.ibv_attr,
.ibv_attr_size = sizeof(*flow->ibv_attr),
+ .rss = temp.rss,
};
*flow->ibv_attr = (struct ibv_flow_attr){
.type = IBV_FLOW_ATTR_NORMAL,
item, msg ? msg : "item not supported");
exit_action_not_supported:
return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
- action, "action not supported");
+ action, msg ? msg : "action not supported");
}
/**
.priv = priv,
.refcnt = 1,
};
- drop->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
+ drop->cq = mlx4_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
if (!drop->cq)
goto error;
- drop->qp = ibv_create_qp(priv->pd,
- &(struct ibv_qp_init_attr){
- .send_cq = drop->cq,
- .recv_cq = drop->cq,
- .qp_type = IBV_QPT_RAW_PACKET,
- });
+ drop->qp = mlx4_glue->create_qp
+ (priv->pd,
+ &(struct ibv_qp_init_attr){
+ .send_cq = drop->cq,
+ .recv_cq = drop->cq,
+ .qp_type = IBV_QPT_RAW_PACKET,
+ });
if (!drop->qp)
goto error;
priv->drop = drop;
return drop;
error:
if (drop->qp)
- claim_zero(ibv_destroy_qp(drop->qp));
+ claim_zero(mlx4_glue->destroy_qp(drop->qp));
if (drop->cq)
- claim_zero(ibv_destroy_cq(drop->cq));
+ claim_zero(mlx4_glue->destroy_cq(drop->cq));
if (drop)
rte_free(drop);
rte_errno = ENOMEM;
if (--drop->refcnt)
return;
drop->priv->drop = NULL;
- claim_zero(ibv_destroy_qp(drop->qp));
- claim_zero(ibv_destroy_cq(drop->cq));
+ claim_zero(mlx4_glue->destroy_qp(drop->qp));
+ claim_zero(mlx4_glue->destroy_cq(drop->cq));
rte_free(drop);
}
if (!enable) {
if (!flow->ibv_flow)
return 0;
- claim_zero(ibv_destroy_flow(flow->ibv_flow));
+ claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow));
flow->ibv_flow = NULL;
if (flow->drop)
mlx4_drop_put(priv->drop);
+ else if (flow->rss)
+ mlx4_rss_detach(flow->rss);
return 0;
}
assert(flow->ibv_attr);
!priv->isolated &&
flow->ibv_attr->priority == MLX4_FLOW_PRIORITY_LAST) {
if (flow->ibv_flow) {
- claim_zero(ibv_destroy_flow(flow->ibv_flow));
+ claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow));
flow->ibv_flow = NULL;
if (flow->drop)
mlx4_drop_put(priv->drop);
+ else if (flow->rss)
+ mlx4_rss_detach(flow->rss);
}
err = EACCES;
msg = ("priority level "
" is reserved when not in isolated mode");
goto error;
}
- if (flow->queue) {
- struct rxq *rxq = NULL;
+ if (flow->rss) {
+ struct mlx4_rss *rss = flow->rss;
+ int missing = 0;
+ unsigned int i;
- if (flow->queue_id < priv->dev->data->nb_rx_queues)
- rxq = priv->dev->data->rx_queues[flow->queue_id];
+ /* Stop at the first nonexistent target queue. */
+ for (i = 0; i != rss->queues; ++i)
+ if (rss->queue_id[i] >=
+ priv->dev->data->nb_rx_queues ||
+ !priv->dev->data->rx_queues[rss->queue_id[i]]) {
+ missing = 1;
+ break;
+ }
if (flow->ibv_flow) {
- if (!rxq ^ !flow->drop)
+ if (missing ^ !flow->drop)
return 0;
/* Verbs flow needs updating. */
- claim_zero(ibv_destroy_flow(flow->ibv_flow));
+ claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow));
flow->ibv_flow = NULL;
if (flow->drop)
mlx4_drop_put(priv->drop);
+ else
+ mlx4_rss_detach(rss);
+ }
+ if (!missing) {
+ err = mlx4_rss_attach(rss);
+ if (err) {
+ err = -err;
+ msg = "cannot create indirection table or hash"
+ " QP to associate flow rule with";
+ goto error;
+ }
+ qp = rss->qp;
}
- if (rxq)
- qp = rxq->qp;
/* A missing target queue drops traffic implicitly. */
- flow->drop = !rxq;
+ flow->drop = missing;
}
if (flow->drop) {
+ if (flow->ibv_flow)
+ return 0;
mlx4_drop_get(priv);
if (!priv->drop) {
err = rte_errno;
assert(qp);
if (flow->ibv_flow)
return 0;
- flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
+ flow->ibv_flow = mlx4_glue->create_flow(qp, flow->ibv_attr);
if (flow->ibv_flow)
return 0;
if (flow->drop)
mlx4_drop_put(priv->drop);
+ else if (flow->rss)
+ mlx4_rss_detach(flow->rss);
err = errno;
msg = "flow rule rejected by device";
error:
}
return flow;
}
+ if (flow->rss)
+ mlx4_rss_put(flow->rss);
rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
error->message);
rte_free(flow);
if (err)
return err;
LIST_REMOVE(flow, next);
+ if (flow->rss)
+ mlx4_rss_put(flow->rss);
rte_free(flow);
return 0;
}
return 0;
}
+/**
+ * Helper function to determine the next configured VLAN filter.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param vlan
+ * VLAN ID to use as a starting point.
+ *
+ * @return
+ * Next configured VLAN ID or a high value (>= 4096) if there is none.
+ */
+static uint16_t
+mlx4_flow_internal_next_vlan(struct priv *priv, uint16_t vlan)
+{
+ while (vlan < 4096) {
+ if (priv->dev->data->vlan_filter_conf.ids[vlan / 64] &
+ (UINT64_C(1) << (vlan % 64)))
+ return vlan;
+ ++vlan;
+ }
+ return vlan;
+}
+
/**
* Generate internal flow rules.
*
+ * Various flow rules are created depending on the mode the device is in:
+ *
+ * 1. Promiscuous:
+ * port MAC + broadcast + catch-all (VLAN filtering is ignored).
+ * 2. All multicast:
+ * port MAC/VLAN + broadcast + catch-all multicast.
+ * 3. Otherwise:
+ * port MAC/VLAN + broadcast MAC/VLAN.
+ *
+ * About MAC flow rules:
+ *
+ * - MAC flow rules are generated from @p dev->data->mac_addrs
+ * (@p priv->mac array).
+ * - An additional flow rule for Ethernet broadcasts is also generated.
+ * - All these are per-VLAN if @p DEV_RX_OFFLOAD_VLAN_FILTER
+ * is enabled and VLAN filters are configured.
+ *
* @param priv
* Pointer to private structure.
* @param[out] error
.priority = MLX4_FLOW_PRIORITY_LAST,
.ingress = 1,
};
+ struct rte_flow_item_eth eth_spec;
+ const struct rte_flow_item_eth eth_mask = {
+ .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+ };
+ const struct rte_flow_item_eth eth_allmulti = {
+ .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+ };
+ struct rte_flow_item_vlan vlan_spec;
+ const struct rte_flow_item_vlan vlan_mask = {
+ .tci = RTE_BE16(0x0fff),
+ };
struct rte_flow_item pattern[] = {
{
.type = MLX4_FLOW_ITEM_TYPE_INTERNAL,
},
{
.type = RTE_FLOW_ITEM_TYPE_ETH,
- .spec = &(struct rte_flow_item_eth){
- .dst = priv->mac,
- },
- .mask = &(struct rte_flow_item_eth){
- .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
- },
+ .spec = ð_spec,
+ .mask = ð_mask,
+ },
+ {
+ /* Replaced with VLAN if filtering is enabled. */
+ .type = RTE_FLOW_ITEM_TYPE_END,
},
{
.type = RTE_FLOW_ITEM_TYPE_END,
},
};
+ /*
+ * Round number of queues down to their previous power of 2 to
+ * comply with RSS context limitations. Extra queues silently do not
+ * get RSS by default.
+ */
+ uint32_t queues =
+ rte_align32pow2(priv->dev->data->nb_rx_queues + 1) >> 1;
+ uint16_t queue[queues];
+ struct rte_flow_action_rss action_rss = {
+ .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
+ .level = 0,
+ .types = 0,
+ .key_len = MLX4_RSS_HASH_KEY_SIZE,
+ .queue_num = queues,
+ .key = mlx4_rss_hash_key_default,
+ .queue = queue,
+ };
struct rte_flow_action actions[] = {
{
- .type = RTE_FLOW_ACTION_TYPE_QUEUE,
- .conf = &(struct rte_flow_action_queue){
- .index = 0,
- },
+ .type = RTE_FLOW_ACTION_TYPE_RSS,
+ .conf = &action_rss,
},
{
.type = RTE_FLOW_ACTION_TYPE_END,
},
};
+ struct ether_addr *rule_mac = ð_spec.dst;
+ rte_be16_t *rule_vlan =
+ (priv->dev->data->dev_conf.rxmode.offloads &
+ DEV_RX_OFFLOAD_VLAN_FILTER) &&
+ !priv->dev->data->promiscuous ?
+ &vlan_spec.tci :
+ NULL;
+ uint16_t vlan = 0;
+ struct rte_flow *flow;
+ unsigned int i;
+ int err = 0;
- if (!mlx4_flow_create(priv->dev, &attr, pattern, actions, error))
- return -rte_errno;
- return 0;
+ /* Nothing to be done if there are no Rx queues. */
+ if (!queues)
+ goto error;
+ /* Prepare default RSS configuration. */
+ for (i = 0; i != queues; ++i)
+ queue[i] = i;
+ /*
+ * Set up VLAN item if filtering is enabled and at least one VLAN
+ * filter is configured.
+ */
+ if (rule_vlan) {
+ vlan = mlx4_flow_internal_next_vlan(priv, 0);
+ if (vlan < 4096) {
+ pattern[2] = (struct rte_flow_item){
+ .type = RTE_FLOW_ITEM_TYPE_VLAN,
+ .spec = &vlan_spec,
+ .mask = &vlan_mask,
+ };
+next_vlan:
+ *rule_vlan = rte_cpu_to_be_16(vlan);
+ } else {
+ rule_vlan = NULL;
+ }
+ }
+ for (i = 0; i != RTE_DIM(priv->mac) + 1; ++i) {
+ const struct ether_addr *mac;
+
+ /* Broadcasts are handled by an extra iteration. */
+ if (i < RTE_DIM(priv->mac))
+ mac = &priv->mac[i];
+ else
+ mac = ð_mask.dst;
+ if (is_zero_ether_addr(mac))
+ continue;
+ /* Check if MAC flow rule is already present. */
+ for (flow = LIST_FIRST(&priv->flows);
+ flow && flow->internal;
+ flow = LIST_NEXT(flow, next)) {
+ const struct ibv_flow_spec_eth *eth =
+ (const void *)((uintptr_t)flow->ibv_attr +
+ sizeof(*flow->ibv_attr));
+ unsigned int j;
+
+ if (!flow->mac)
+ continue;
+ assert(flow->ibv_attr->type == IBV_FLOW_ATTR_NORMAL);
+ assert(flow->ibv_attr->num_of_specs == 1);
+ assert(eth->type == IBV_FLOW_SPEC_ETH);
+ assert(flow->rss);
+ if (rule_vlan &&
+ (eth->val.vlan_tag != *rule_vlan ||
+ eth->mask.vlan_tag != RTE_BE16(0x0fff)))
+ continue;
+ if (!rule_vlan && eth->mask.vlan_tag)
+ continue;
+ for (j = 0; j != sizeof(mac->addr_bytes); ++j)
+ if (eth->val.dst_mac[j] != mac->addr_bytes[j] ||
+ eth->mask.dst_mac[j] != UINT8_C(0xff) ||
+ eth->val.src_mac[j] != UINT8_C(0x00) ||
+ eth->mask.src_mac[j] != UINT8_C(0x00))
+ break;
+ if (j != sizeof(mac->addr_bytes))
+ continue;
+ if (flow->rss->queues != queues ||
+ memcmp(flow->rss->queue_id, action_rss.queue,
+ queues * sizeof(flow->rss->queue_id[0])))
+ continue;
+ break;
+ }
+ if (!flow || !flow->internal) {
+ /* Not found, create a new flow rule. */
+ memcpy(rule_mac, mac, sizeof(*mac));
+ flow = mlx4_flow_create(priv->dev, &attr, pattern,
+ actions, error);
+ if (!flow) {
+ err = -rte_errno;
+ goto error;
+ }
+ }
+ flow->select = 1;
+ flow->mac = 1;
+ }
+ if (rule_vlan) {
+ vlan = mlx4_flow_internal_next_vlan(priv, vlan + 1);
+ if (vlan < 4096)
+ goto next_vlan;
+ }
+ /* Take care of promiscuous and all multicast flow rules. */
+ if (priv->dev->data->promiscuous || priv->dev->data->all_multicast) {
+ for (flow = LIST_FIRST(&priv->flows);
+ flow && flow->internal;
+ flow = LIST_NEXT(flow, next)) {
+ if (priv->dev->data->promiscuous) {
+ if (flow->promisc)
+ break;
+ } else {
+ assert(priv->dev->data->all_multicast);
+ if (flow->allmulti)
+ break;
+ }
+ }
+ if (flow && flow->internal) {
+ assert(flow->rss);
+ if (flow->rss->queues != queues ||
+ memcmp(flow->rss->queue_id, action_rss.queue,
+ queues * sizeof(flow->rss->queue_id[0])))
+ flow = NULL;
+ }
+ if (!flow || !flow->internal) {
+ /* Not found, create a new flow rule. */
+ if (priv->dev->data->promiscuous) {
+ pattern[1].spec = NULL;
+ pattern[1].mask = NULL;
+ } else {
+ assert(priv->dev->data->all_multicast);
+ pattern[1].spec = ð_allmulti;
+ pattern[1].mask = ð_allmulti;
+ }
+ pattern[2] = pattern[3];
+ flow = mlx4_flow_create(priv->dev, &attr, pattern,
+ actions, error);
+ if (!flow) {
+ err = -rte_errno;
+ goto error;
+ }
+ }
+ assert(flow->promisc || flow->allmulti);
+ flow->select = 1;
+ }
+error:
+ /* Clear selection and clean up stale internal flow rules. */
+ flow = LIST_FIRST(&priv->flows);
+ while (flow && flow->internal) {
+ struct rte_flow *next = LIST_NEXT(flow, next);
+
+ if (!flow->select)
+ claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
+ else
+ flow->select = 0;
+ flow = next;
+ }
+ return err;
}
/**
flow && flow->internal;
flow = LIST_FIRST(&priv->flows))
claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
- } else if (!LIST_FIRST(&priv->flows) ||
- !LIST_FIRST(&priv->flows)->internal) {
- /*
- * If the first rule is not internal outside isolated mode,
- * they must be added back.
- */
+ } else {
+ /* Refresh internal rules. */
ret = mlx4_flow_internal(priv, error);
if (ret)
return ret;
}
/* Toggle the remaining flow rules . */
- for (flow = LIST_FIRST(&priv->flows);
- flow;
- flow = LIST_NEXT(flow, next)) {
+ LIST_FOREACH(flow, &priv->flows, next) {
ret = mlx4_flow_toggle(priv, flow, priv->started, error);
if (ret)
return ret;
while ((flow = LIST_FIRST(&priv->flows)))
mlx4_flow_destroy(priv->dev, flow, NULL);
+ assert(LIST_EMPTY(&priv->rss));
}
static const struct rte_flow_ops mlx4_flow_ops = {