net/mlx4: add RSS flow rule action support
authorAdrien Mazarguil <adrien.mazarguil@6wind.com>
Thu, 12 Oct 2017 12:19:41 +0000 (14:19 +0200)
committerFerruh Yigit <ferruh.yigit@intel.com>
Fri, 13 Oct 2017 00:18:48 +0000 (01:18 +0100)
This patch dissociates single-queue indirection tables and hash QP objects
from Rx queue structures to relinquish their control to users through the
RSS flow rule action, while simultaneously allowing multiple queues to be
associated with RSS contexts.

Flow rules share identical RSS contexts (hashed fields, hash key, target
queues) to save on memory and other resources. The trade-off is some added
complexity due to reference counters management on RSS contexts.

The QUEUE action is re-implemented on top of an automatically-generated
single-queue RSS context.

The following hardware limitations apply to RSS contexts:

- The number of queues in a group must be a power of two.
- Queue indices must be consecutive, for instance the [0 1 2 3] set is
  allowed, however [3 2 1 0], [0 2 1 3] and [0 0 1 1 2 3 3 3] are not.
- The first queue of a group must be aligned to a multiple of the context
  size, e.g. if queues [0 1 2 3 4] are defined globally, allowed group
  combinations are [0 1] and [2 3]; groups [1 2] and [3 4] are not
  supported.
- RSS hash key, while configurable per context, must be exactly 40 bytes
  long.
- The only supported hash algorithm is Toeplitz.

Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
doc/guides/nics/features/mlx4.ini
drivers/net/mlx4/Makefile
drivers/net/mlx4/mlx4.c
drivers/net/mlx4/mlx4.h
drivers/net/mlx4/mlx4_ethdev.c
drivers/net/mlx4/mlx4_flow.c
drivers/net/mlx4/mlx4_flow.h
drivers/net/mlx4/mlx4_rxq.c
drivers/net/mlx4/mlx4_rxtx.h
mk/rte.app.mk

index 6f8c82a..9750ebf 100644 (file)
@@ -16,6 +16,7 @@ Promiscuous mode     = Y
 Allmulticast mode    = Y
 Unicast MAC filter   = Y
 Multicast MAC filter = Y
+RSS hash             = Y
 SR-IOV               = Y
 VLAN filter          = Y
 Basic stats          = Y
index 0515cd7..3b3a020 100644 (file)
@@ -54,7 +54,7 @@ CFLAGS += -D_BSD_SOURCE
 CFLAGS += -D_DEFAULT_SOURCE
 CFLAGS += -D_XOPEN_SOURCE=600
 CFLAGS += $(WERROR_FLAGS)
-LDLIBS += -libverbs
+LDLIBS += -libverbs -lmlx4
 
 # A few warnings cannot be avoided in external headers.
 CFLAGS += -Wno-error=cast-qual
index 52f8d51..0db9a19 100644 (file)
@@ -50,6 +50,7 @@
 #pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <infiniband/verbs.h>
+#include <infiniband/mlx4dv.h>
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
@@ -99,8 +100,20 @@ mlx4_dev_configure(struct rte_eth_dev *dev)
 {
        struct priv *priv = dev->data->dev_private;
        struct rte_flow_error error;
+       uint8_t log2_range = rte_log2_u32(dev->data->nb_rx_queues);
        int ret;
 
+       /* Prepare range for RSS contexts before creating the first WQ. */
+       ret = mlx4dv_set_context_attr(priv->ctx,
+                                     MLX4DV_SET_CTX_ATTR_LOG_WQS_RANGE_SZ,
+                                     &log2_range);
+       if (ret) {
+               ERROR("cannot set up range size for RSS context to %u"
+                     " (for %u Rx queues), error: %s",
+                     1 << log2_range, dev->data->nb_rx_queues, strerror(ret));
+               rte_errno = ret;
+               return -ret;
+       }
        /* Prepare internal flow rules. */
        ret = mlx4_flow_sync(priv, &error);
        if (ret) {
index b04a104..f4da8c6 100644 (file)
@@ -95,6 +95,7 @@ enum {
 #define MLX4_DRIVER_NAME "net_mlx4"
 
 struct mlx4_drop;
+struct mlx4_rss;
 struct rxq;
 struct txq;
 struct rte_flow;
@@ -114,6 +115,7 @@ struct priv {
        uint32_t isolated:1; /**< Toggle isolated mode. */
        struct rte_intr_handle intr_handle; /**< Port interrupt handle. */
        struct mlx4_drop *drop; /**< Shared resources for drop flow rules. */
+       LIST_HEAD(, mlx4_rss) rss; /**< Shared targets for Rx flow rules. */
        LIST_HEAD(, rte_flow) flows; /**< Configured flow rule handles. */
        struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
        /**< Configured MAC addresses. Unused entries are zeroed. */
index 661e252..3623909 100644 (file)
@@ -769,6 +769,7 @@ mlx4_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
        info->tx_offload_capa = 0;
        if (mlx4_get_ifname(priv, &ifname) == 0)
                info->if_index = if_nametoindex(ifname);
+       info->hash_key_size = MLX4_RSS_HASH_KEY_SIZE;
        info->speed_capa =
                        ETH_LINK_SPEED_1G |
                        ETH_LINK_SPEED_10G |
index 41423cd..2b60d76 100644 (file)
@@ -102,6 +102,62 @@ struct mlx4_drop {
        uint32_t refcnt; /**< Reference count. */
 };
 
+/**
+ * Convert DPDK RSS hash fields to their Verbs equivalent.
+ *
+ * @param rss_hf
+ *   Hash fields in DPDK format (see struct rte_eth_rss_conf).
+ *
+ * @return
+ *   A valid Verbs RSS hash fields mask for mlx4 on success, (uint64_t)-1
+ *   otherwise and rte_errno is set.
+ */
+static uint64_t
+mlx4_conv_rss_hf(uint64_t rss_hf)
+{
+       enum { IPV4, IPV6, TCP, UDP, };
+       const uint64_t in[] = {
+               [IPV4] = (ETH_RSS_IPV4 |
+                         ETH_RSS_FRAG_IPV4 |
+                         ETH_RSS_NONFRAG_IPV4_TCP |
+                         ETH_RSS_NONFRAG_IPV4_UDP |
+                         ETH_RSS_NONFRAG_IPV4_OTHER),
+               [IPV6] = (ETH_RSS_IPV6 |
+                         ETH_RSS_FRAG_IPV6 |
+                         ETH_RSS_NONFRAG_IPV6_TCP |
+                         ETH_RSS_NONFRAG_IPV6_UDP |
+                         ETH_RSS_NONFRAG_IPV6_OTHER |
+                         ETH_RSS_IPV6_EX |
+                         ETH_RSS_IPV6_TCP_EX |
+                         ETH_RSS_IPV6_UDP_EX),
+               [TCP] = (ETH_RSS_NONFRAG_IPV4_TCP |
+                        ETH_RSS_NONFRAG_IPV6_TCP |
+                        ETH_RSS_IPV6_TCP_EX),
+               [UDP] = (ETH_RSS_NONFRAG_IPV4_UDP |
+                        ETH_RSS_NONFRAG_IPV6_UDP |
+                        ETH_RSS_IPV6_UDP_EX),
+       };
+       const uint64_t out[RTE_DIM(in)] = {
+               [IPV4] = IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4,
+               [IPV6] = IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6,
+               [TCP] = IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_DST_PORT_TCP,
+               [UDP] = IBV_RX_HASH_SRC_PORT_UDP | IBV_RX_HASH_DST_PORT_UDP,
+       };
+       uint64_t seen = 0;
+       uint64_t conv = 0;
+       unsigned int i;
+
+       for (i = 0; i != RTE_DIM(in); ++i)
+               if (rss_hf & in[i]) {
+                       seen |= rss_hf & in[i];
+                       conv |= out[i];
+               }
+       if (!(rss_hf & ~seen))
+               return conv;
+       rte_errno = ENOTSUP;
+       return (uint64_t)-1;
+}
+
 /**
  * Merge Ethernet pattern item into flow rule handle.
  *
@@ -663,6 +719,9 @@ fill:
        for (action = actions; action->type; ++action) {
                switch (action->type) {
                        const struct rte_flow_action_queue *queue;
+                       const struct rte_flow_action_rss *rss;
+                       const struct rte_eth_rss_conf *rss_conf;
+                       unsigned int i;
 
                case RTE_FLOW_ACTION_TYPE_VOID:
                        continue;
@@ -670,23 +729,87 @@ fill:
                        flow->drop = 1;
                        break;
                case RTE_FLOW_ACTION_TYPE_QUEUE:
+                       if (flow->rss)
+                               break;
                        queue = action->conf;
-                       if (queue->index >= priv->dev->data->nb_rx_queues)
+                       flow->rss = mlx4_rss_get
+                               (priv, 0, mlx4_rss_hash_key_default, 1,
+                                &queue->index);
+                       if (!flow->rss) {
+                               msg = "not enough resources for additional"
+                                       " single-queue RSS context";
+                               goto exit_action_not_supported;
+                       }
+                       break;
+               case RTE_FLOW_ACTION_TYPE_RSS:
+                       if (flow->rss)
+                               break;
+                       rss = action->conf;
+                       /* Default RSS configuration if none is provided. */
+                       rss_conf =
+                               rss->rss_conf ?
+                               rss->rss_conf :
+                               &(struct rte_eth_rss_conf){
+                                       .rss_key = mlx4_rss_hash_key_default,
+                                       .rss_key_len = MLX4_RSS_HASH_KEY_SIZE,
+                                       .rss_hf = (ETH_RSS_IPV4 |
+                                                  ETH_RSS_NONFRAG_IPV4_UDP |
+                                                  ETH_RSS_NONFRAG_IPV4_TCP |
+                                                  ETH_RSS_IPV6 |
+                                                  ETH_RSS_NONFRAG_IPV6_UDP |
+                                                  ETH_RSS_NONFRAG_IPV6_TCP),
+                               };
+                       /* Sanity checks. */
+                       if (!rte_is_power_of_2(rss->num)) {
+                               msg = "for RSS, mlx4 requires the number of"
+                                       " queues to be a power of two";
+                               goto exit_action_not_supported;
+                       }
+                       if (rss_conf->rss_key_len !=
+                           sizeof(flow->rss->key)) {
+                               msg = "mlx4 supports exactly one RSS hash key"
+                                       " length: "
+                                       MLX4_STR_EXPAND(MLX4_RSS_HASH_KEY_SIZE);
+                               goto exit_action_not_supported;
+                       }
+                       for (i = 1; i < rss->num; ++i)
+                               if (rss->queue[i] - rss->queue[i - 1] != 1)
+                                       break;
+                       if (i != rss->num) {
+                               msg = "mlx4 requires RSS contexts to use"
+                                       " consecutive queue indices only";
+                               goto exit_action_not_supported;
+                       }
+                       if (rss->queue[0] % rss->num) {
+                               msg = "mlx4 requires the first queue of a RSS"
+                                       " context to be aligned on a multiple"
+                                       " of the context size";
+                               goto exit_action_not_supported;
+                       }
+                       flow->rss = mlx4_rss_get
+                               (priv, mlx4_conv_rss_hf(rss_conf->rss_hf),
+                                rss_conf->rss_key, rss->num, rss->queue);
+                       if (!flow->rss) {
+                               msg = "either invalid parameters or not enough"
+                                       " resources for additional multi-queue"
+                                       " RSS context";
                                goto exit_action_not_supported;
-                       flow->queue = 1;
-                       flow->queue_id = queue->index;
+                       }
                        break;
                default:
                        goto exit_action_not_supported;
                }
        }
-       if (!flow->queue && !flow->drop)
+       if (!flow->rss && !flow->drop)
                return rte_flow_error_set
                        (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
                         NULL, "no valid action");
        /* Validation ends here. */
-       if (!addr)
+       if (!addr) {
+               if (flow->rss)
+                       mlx4_rss_put(flow->rss);
                return 0;
+       }
        if (flow == &temp) {
                /* Allocate proper handle based on collected data. */
                const struct mlx4_malloc_vec vec[] = {
@@ -711,6 +834,7 @@ fill:
                *flow = (struct rte_flow){
                        .ibv_attr = temp.ibv_attr,
                        .ibv_attr_size = sizeof(*flow->ibv_attr),
+                       .rss = temp.rss,
                };
                *flow->ibv_attr = (struct ibv_flow_attr){
                        .type = IBV_FLOW_ATTR_NORMAL,
@@ -727,7 +851,7 @@ exit_item_not_supported:
                                  item, msg ? msg : "item not supported");
 exit_action_not_supported:
        return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
-                                 action, "action not supported");
+                                 action, msg ? msg : "action not supported");
 }
 
 /**
@@ -850,6 +974,8 @@ mlx4_flow_toggle(struct priv *priv,
                flow->ibv_flow = NULL;
                if (flow->drop)
                        mlx4_drop_put(priv->drop);
+               else if (flow->rss)
+                       mlx4_rss_detach(flow->rss);
                return 0;
        }
        assert(flow->ibv_attr);
@@ -861,6 +987,8 @@ mlx4_flow_toggle(struct priv *priv,
                        flow->ibv_flow = NULL;
                        if (flow->drop)
                                mlx4_drop_put(priv->drop);
+                       else if (flow->rss)
+                               mlx4_rss_detach(flow->rss);
                }
                err = EACCES;
                msg = ("priority level "
@@ -868,24 +996,42 @@ mlx4_flow_toggle(struct priv *priv,
                       " is reserved when not in isolated mode");
                goto error;
        }
-       if (flow->queue) {
-               struct rxq *rxq = NULL;
+       if (flow->rss) {
+               struct mlx4_rss *rss = flow->rss;
+               int missing = 0;
+               unsigned int i;
 
-               if (flow->queue_id < priv->dev->data->nb_rx_queues)
-                       rxq = priv->dev->data->rx_queues[flow->queue_id];
+               /* Stop at the first nonexistent target queue. */
+               for (i = 0; i != rss->queues; ++i)
+                       if (rss->queue_id[i] >=
+                           priv->dev->data->nb_rx_queues ||
+                           !priv->dev->data->rx_queues[rss->queue_id[i]]) {
+                               missing = 1;
+                               break;
+                       }
                if (flow->ibv_flow) {
-                       if (!rxq ^ !flow->drop)
+                       if (missing ^ !flow->drop)
                                return 0;
                        /* Verbs flow needs updating. */
                        claim_zero(ibv_destroy_flow(flow->ibv_flow));
                        flow->ibv_flow = NULL;
                        if (flow->drop)
                                mlx4_drop_put(priv->drop);
+                       else
+                               mlx4_rss_detach(rss);
+               }
+               if (!missing) {
+                       err = mlx4_rss_attach(rss);
+                       if (err) {
+                               err = -err;
+                               msg = "cannot create indirection table or hash"
+                                       " QP to associate flow rule with";
+                               goto error;
+                       }
+                       qp = rss->qp;
                }
-               if (rxq)
-                       qp = rxq->qp;
                /* A missing target queue drops traffic implicitly. */
-               flow->drop = !rxq;
+               flow->drop = missing;
        }
        if (flow->drop) {
                mlx4_drop_get(priv);
@@ -904,6 +1050,8 @@ mlx4_flow_toggle(struct priv *priv,
                return 0;
        if (flow->drop)
                mlx4_drop_put(priv->drop);
+       else if (flow->rss)
+               mlx4_rss_detach(flow->rss);
        err = errno;
        msg = "flow rule rejected by device";
 error:
@@ -946,6 +1094,8 @@ mlx4_flow_create(struct rte_eth_dev *dev,
                }
                return flow;
        }
+       if (flow->rss)
+               mlx4_rss_put(flow->rss);
        rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
                           error->message);
        rte_free(flow);
@@ -992,6 +1142,8 @@ mlx4_flow_destroy(struct rte_eth_dev *dev,
        if (err)
                return err;
        LIST_REMOVE(flow, next);
+       if (flow->rss)
+               mlx4_rss_put(flow->rss);
        rte_free(flow);
        return 0;
 }
@@ -1320,6 +1472,7 @@ mlx4_flow_clean(struct priv *priv)
 
        while ((flow = LIST_FIRST(&priv->flows)))
                mlx4_flow_destroy(priv->dev, flow, NULL);
+       assert(LIST_EMPTY(&priv->rss));
 }
 
 static const struct rte_flow_ops mlx4_flow_ops = {
index 134e14d..651fd37 100644 (file)
@@ -70,8 +70,7 @@ struct rte_flow {
        uint32_t promisc:1; /**< This rule matches everything. */
        uint32_t allmulti:1; /**< This rule matches all multicast traffic. */
        uint32_t drop:1; /**< This rule drops packets. */
-       uint32_t queue:1; /**< Target is a receive queue. */
-       uint16_t queue_id; /**< Target queue. */
+       struct mlx4_rss *rss; /**< Rx target. */
 };
 
 /* mlx4_flow.c */
index 171fe3f..483fe9b 100644 (file)
 #include "mlx4_rxtx.h"
 #include "mlx4_utils.h"
 
+/**
+ * Historical RSS hash key.
+ *
+ * This used to be the default for mlx4 in Linux before v3.19 switched to
+ * generating random hash keys through netdev_rss_key_fill().
+ *
+ * It is used in this PMD for consistency with past DPDK releases but can
+ * now be overridden through user configuration.
+ *
+ * Note: this is not const to work around API quirks.
+ */
+uint8_t
+mlx4_rss_hash_key_default[MLX4_RSS_HASH_KEY_SIZE] = {
+       0x2c, 0xc6, 0x81, 0xd1,
+       0x5b, 0xdb, 0xf4, 0xf7,
+       0xfc, 0xa2, 0x83, 0x19,
+       0xdb, 0x1a, 0x3e, 0x94,
+       0x6b, 0x9e, 0x38, 0xd9,
+       0x2c, 0x9c, 0x03, 0xd1,
+       0xad, 0x99, 0x44, 0xa7,
+       0xd9, 0x56, 0x3d, 0x59,
+       0x06, 0x3c, 0x25, 0xf3,
+       0xfc, 0x1f, 0xdc, 0x2a,
+};
+
+/**
+ * Obtain a RSS context with specified properties.
+ *
+ * Used when creating a flow rule targeting one or several Rx queues.
+ *
+ * If a matching RSS context already exists, it is returned with its
+ * reference count incremented.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param fields
+ *   Fields for RSS processing (Verbs format).
+ * @param[in] key
+ *   Hash key to use (whose size is exactly MLX4_RSS_HASH_KEY_SIZE).
+ * @param queues
+ *   Number of target queues.
+ * @param[in] queue_id
+ *   Target queues.
+ *
+ * @return
+ *   Pointer to RSS context on success, NULL otherwise and rte_errno is set.
+ */
+struct mlx4_rss *
+mlx4_rss_get(struct priv *priv, uint64_t fields,
+            uint8_t key[MLX4_RSS_HASH_KEY_SIZE],
+            uint16_t queues, const uint16_t queue_id[])
+{
+       struct mlx4_rss *rss;
+       size_t queue_id_size = sizeof(queue_id[0]) * queues;
+
+       LIST_FOREACH(rss, &priv->rss, next)
+               if (fields == rss->fields &&
+                   queues == rss->queues &&
+                   !memcmp(key, rss->key, MLX4_RSS_HASH_KEY_SIZE) &&
+                   !memcmp(queue_id, rss->queue_id, queue_id_size)) {
+                       ++rss->refcnt;
+                       return rss;
+               }
+       rss = rte_malloc(__func__, offsetof(struct mlx4_rss, queue_id) +
+                        queue_id_size, 0);
+       if (!rss)
+               goto error;
+       *rss = (struct mlx4_rss){
+               .priv = priv,
+               .refcnt = 1,
+               .usecnt = 0,
+               .qp = NULL,
+               .ind = NULL,
+               .fields = fields,
+               .queues = queues,
+       };
+       memcpy(rss->key, key, MLX4_RSS_HASH_KEY_SIZE);
+       memcpy(rss->queue_id, queue_id, queue_id_size);
+       LIST_INSERT_HEAD(&priv->rss, rss, next);
+       return rss;
+error:
+       rte_errno = ENOMEM;
+       return NULL;
+}
+
+/**
+ * Release a RSS context instance.
+ *
+ * Used when destroying a flow rule targeting one or several Rx queues.
+ *
+ * This function decrements the reference count of the context and destroys
+ * it after reaching 0. The context must have no users at this point; all
+ * prior calls to mlx4_rss_attach() must have been followed by matching
+ * calls to mlx4_rss_detach().
+ *
+ * @param rss
+ *   RSS context to release.
+ */
+void mlx4_rss_put(struct mlx4_rss *rss)
+{
+       assert(rss->refcnt);
+       if (--rss->refcnt)
+               return;
+       assert(!rss->usecnt);
+       assert(!rss->qp);
+       assert(!rss->ind);
+       LIST_REMOVE(rss, next);
+       rte_free(rss);
+}
+
+/**
+ * Attach a user to a RSS context instance.
+ *
+ * Used when the RSS QP and indirection table objects must be instantiated,
+ * that is, when a flow rule must be enabled.
+ *
+ * This function increments the usage count of the context.
+ *
+ * @param rss
+ *   RSS context to attach to.
+ */
+int mlx4_rss_attach(struct mlx4_rss *rss)
+{
+       assert(rss->refcnt);
+       if (rss->usecnt++) {
+               assert(rss->qp);
+               assert(rss->ind);
+               return 0;
+       }
+
+       struct ibv_wq *ind_tbl[rss->queues];
+       struct priv *priv = rss->priv;
+       const char *msg;
+       unsigned int i;
+       int ret;
+
+       if (!rte_is_power_of_2(RTE_DIM(ind_tbl))) {
+               msg = "number of RSS queues must be a power of two";
+               goto error;
+       }
+       for (i = 0; i != RTE_DIM(ind_tbl); ++i) {
+               uint16_t id = rss->queue_id[i];
+               struct rxq *rxq = NULL;
+
+               if (id < priv->dev->data->nb_rx_queues)
+                       rxq = priv->dev->data->rx_queues[id];
+               if (!rxq) {
+                       msg = "RSS target queue is not configured";
+                       goto error;
+               }
+               ind_tbl[i] = rxq->wq;
+       }
+       rss->ind = ibv_create_rwq_ind_table
+               (priv->ctx,
+                &(struct ibv_rwq_ind_table_init_attr){
+                       .log_ind_tbl_size = rte_log2_u32(RTE_DIM(ind_tbl)),
+                       .ind_tbl = ind_tbl,
+                       .comp_mask = 0,
+                });
+       if (!rss->ind) {
+               msg = "RSS indirection table creation failure";
+               goto error;
+       }
+       rss->qp = ibv_create_qp_ex
+               (priv->ctx,
+                &(struct ibv_qp_init_attr_ex){
+                       .comp_mask = (IBV_QP_INIT_ATTR_PD |
+                                     IBV_QP_INIT_ATTR_RX_HASH |
+                                     IBV_QP_INIT_ATTR_IND_TABLE),
+                       .qp_type = IBV_QPT_RAW_PACKET,
+                       .pd = priv->pd,
+                       .rwq_ind_tbl = rss->ind,
+                       .rx_hash_conf = {
+                               .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
+                               .rx_hash_key_len = MLX4_RSS_HASH_KEY_SIZE,
+                               .rx_hash_key = rss->key,
+                               .rx_hash_fields_mask = rss->fields,
+                       },
+                });
+       if (!rss->qp) {
+               msg = "RSS hash QP creation failure";
+               goto error;
+       }
+       ret = ibv_modify_qp
+               (rss->qp,
+                &(struct ibv_qp_attr){
+                       .qp_state = IBV_QPS_INIT,
+                       .port_num = priv->port,
+                },
+                IBV_QP_STATE | IBV_QP_PORT);
+       if (ret) {
+               msg = "failed to switch RSS hash QP to INIT state";
+               goto error;
+       }
+       ret = ibv_modify_qp
+               (rss->qp,
+                &(struct ibv_qp_attr){
+                       .qp_state = IBV_QPS_RTR,
+                },
+                IBV_QP_STATE);
+       if (ret) {
+               msg = "failed to switch RSS hash QP to RTR state";
+               goto error;
+       }
+       return 0;
+error:
+       ERROR("mlx4: %s", msg);
+       --rss->usecnt;
+       rte_errno = EINVAL;
+       return -rte_errno;
+}
+
+/**
+ * Detach a user from a RSS context instance.
+ *
+ * Used when disabling (not destroying) a flow rule.
+ *
+ * This function decrements the usage count of the context and destroys
+ * usage resources after reaching 0.
+ *
+ * @param rss
+ *   RSS context to detach from.
+ */
+void mlx4_rss_detach(struct mlx4_rss *rss)
+{
+       assert(rss->refcnt);
+       assert(rss->qp);
+       assert(rss->ind);
+       if (--rss->usecnt)
+               return;
+       claim_zero(ibv_destroy_qp(rss->qp));
+       rss->qp = NULL;
+       claim_zero(ibv_destroy_rwq_ind_table(rss->ind));
+       rss->ind = NULL;
+}
+
 /**
  * Allocate Rx queue elements.
  *
@@ -295,57 +531,6 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
                      (void *)dev, strerror(rte_errno));
                goto error;
        }
-       rxq->ind = ibv_create_rwq_ind_table
-               (priv->ctx,
-                &(struct ibv_rwq_ind_table_init_attr){
-                       .log_ind_tbl_size = 0,
-                       .ind_tbl = (struct ibv_wq *[]){
-                               rxq->wq,
-                       },
-                       .comp_mask = 0,
-                });
-       if (!rxq->ind) {
-               rte_errno = errno ? errno : EINVAL;
-               ERROR("%p: indirection table creation failure: %s",
-                     (void *)dev, strerror(errno));
-               goto error;
-       }
-       rxq->qp = ibv_create_qp_ex
-               (priv->ctx,
-                &(struct ibv_qp_init_attr_ex){
-                       .comp_mask = (IBV_QP_INIT_ATTR_PD |
-                                     IBV_QP_INIT_ATTR_RX_HASH |
-                                     IBV_QP_INIT_ATTR_IND_TABLE),
-                       .qp_type = IBV_QPT_RAW_PACKET,
-                       .pd = priv->pd,
-                       .rwq_ind_tbl = rxq->ind,
-                       .rx_hash_conf = {
-                               .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
-                               .rx_hash_key_len = MLX4_RSS_HASH_KEY_SIZE,
-                               .rx_hash_key =
-                                       (uint8_t [MLX4_RSS_HASH_KEY_SIZE]){ 0 },
-                               .rx_hash_fields_mask = 0,
-                       },
-                });
-       if (!rxq->qp) {
-               rte_errno = errno ? errno : EINVAL;
-               ERROR("%p: QP creation failure: %s",
-                     (void *)dev, strerror(rte_errno));
-               goto error;
-       }
-       ret = ibv_modify_qp
-               (rxq->qp,
-                &(struct ibv_qp_attr){
-                       .qp_state = IBV_QPS_INIT,
-                       .port_num = priv->port,
-                },
-                IBV_QP_STATE | IBV_QP_PORT);
-       if (ret) {
-               rte_errno = ret;
-               ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
-                     (void *)dev, strerror(rte_errno));
-               goto error;
-       }
        ret = mlx4_rxq_alloc_elts(rxq);
        if (ret) {
                ERROR("%p: RXQ allocation failed: %s",
@@ -361,18 +546,6 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
                      strerror(rte_errno));
                goto error;
        }
-       ret = ibv_modify_qp
-               (rxq->qp,
-                &(struct ibv_qp_attr){
-                       .qp_state = IBV_QPS_RTR,
-                },
-                IBV_QP_STATE);
-       if (ret) {
-               rte_errno = ret;
-               ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
-                     (void *)dev, strerror(rte_errno));
-               goto error;
-       }
        DEBUG("%p: adding Rx queue %p to list", (void *)dev, (void *)rxq);
        dev->data->rx_queues[idx] = rxq;
        /* Enable associated flows. */
@@ -417,10 +590,6 @@ mlx4_rx_queue_release(void *dpdk_rxq)
                }
        mlx4_flow_sync(priv, NULL);
        mlx4_rxq_free_elts(rxq);
-       if (rxq->qp)
-               claim_zero(ibv_destroy_qp(rxq->qp));
-       if (rxq->ind)
-               claim_zero(ibv_destroy_rwq_ind_table(rxq->ind));
        if (rxq->wq)
                claim_zero(ibv_destroy_wq(rxq->wq));
        if (rxq->cq)
index 897fd2a..eca966f 100644 (file)
@@ -35,6 +35,7 @@
 #define MLX4_RXTX_H_
 
 #include <stdint.h>
+#include <sys/queue.h>
 
 /* Verbs headers do not support -pedantic. */
 #ifdef PEDANTIC
@@ -74,8 +75,6 @@ struct rxq {
        struct ibv_mr *mr; /**< Memory region (for mp). */
        struct ibv_cq *cq; /**< Completion queue. */
        struct ibv_wq *wq; /**< Work queue. */
-       struct ibv_rwq_ind_table *ind; /**< Indirection table. */
-       struct ibv_qp *qp; /**< Queue pair. */
        struct ibv_comp_channel *channel; /**< Rx completion channel. */
        unsigned int port_id; /**< Port ID for incoming packets. */
        unsigned int elts_n; /**< (*elts)[] length. */
@@ -86,6 +85,20 @@ struct rxq {
        uint8_t data[]; /**< Remaining queue resources. */
 };
 
+/** Shared flow target for Rx queues. */
+struct mlx4_rss {
+       LIST_ENTRY(mlx4_rss) next; /**< Next entry in list. */
+       struct priv *priv; /**< Back pointer to private data. */
+       uint32_t refcnt; /**< Reference count for this object. */
+       uint32_t usecnt; /**< Number of users relying on @p qp and @p ind. */
+       struct ibv_qp *qp; /**< Queue pair. */
+       struct ibv_rwq_ind_table *ind; /**< Indirection table. */
+       uint64_t fields; /**< Fields for RSS processing (Verbs format). */
+       uint8_t key[MLX4_RSS_HASH_KEY_SIZE]; /**< Hash key to use. */
+       uint16_t queues; /**< Number of target queues. */
+       uint16_t queue_id[]; /**< Target queues. */
+};
+
 /** Tx element. */
 struct txq_elt {
        struct ibv_send_wr wr; /**< Work request. */
@@ -126,6 +139,13 @@ struct txq {
 
 /* mlx4_rxq.c */
 
+uint8_t mlx4_rss_hash_key_default[MLX4_RSS_HASH_KEY_SIZE];
+struct mlx4_rss *mlx4_rss_get(struct priv *priv, uint64_t fields,
+                             uint8_t key[MLX4_RSS_HASH_KEY_SIZE],
+                             uint16_t queues, const uint16_t queue_id[]);
+void mlx4_rss_put(struct mlx4_rss *rss);
+int mlx4_rss_attach(struct mlx4_rss *rss);
+void mlx4_rss_detach(struct mlx4_rss *rss);
 int mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
                        uint16_t desc, unsigned int socket,
                        const struct rte_eth_rxconf *conf,
index 83e042e..8192b98 100644 (file)
@@ -135,7 +135,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KNI)        += -lrte_pmd_kni
 endif
 _LDLIBS-$(CONFIG_RTE_LIBRTE_LIO_PMD)        += -lrte_pmd_lio
-_LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD)       += -lrte_pmd_mlx4 -libverbs
+_LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD)       += -lrte_pmd_mlx4 -libverbs -lmlx4
 _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD)       += -lrte_pmd_mlx5 -libverbs -lmlx5
 _LDLIBS-$(CONFIG_RTE_LIBRTE_MRVL_PMD)       += -lrte_pmd_mrvl -L$(LIBMUSDK_PATH)/lib -lmusdk
 _LDLIBS-$(CONFIG_RTE_LIBRTE_NFP_PMD)        += -lrte_pmd_nfp