From 598fb8aec6f6c8618a2425789e12fc352f120858 Mon Sep 17 00:00:00 2001 From: Gaetan Rivet Date: Tue, 18 Jul 2017 14:48:20 +0200 Subject: [PATCH] net/failsafe: support device removal Listen to INTR_RMV events issued by slaves. Add atomic flags on slave queues to detect use of slave bursts function. If a removal is detected, set the recollection flag on this slave. During a slave upkeep round, if its recollection flag is set and its burst functions are not in use by any thread, remove that slave. Signed-off-by: Gaetan Rivet Acked-by: Olga Shern --- doc/guides/nics/fail_safe.rst | 15 ++++ drivers/net/failsafe/failsafe.c | 1 + drivers/net/failsafe/failsafe_args.c | 13 ++++ drivers/net/failsafe/failsafe_eal.c | 3 +- drivers/net/failsafe/failsafe_ether.c | 96 +++++++++++++++++++++++-- drivers/net/failsafe/failsafe_ops.c | 37 +++++++++- drivers/net/failsafe/failsafe_private.h | 72 ++++++++++++++++--- drivers/net/failsafe/failsafe_rxtx.c | 17 ++++- 8 files changed, 234 insertions(+), 20 deletions(-) diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst index c81f06acb4..34ecae2d19 100644 --- a/doc/guides/nics/fail_safe.rst +++ b/doc/guides/nics/fail_safe.rst @@ -51,6 +51,12 @@ The Fail-safe PMD only supports a limited set of features. If you plan to use a device underneath the Fail-safe PMD with a specific feature, this feature must be supported by the Fail-safe PMD to avoid throwing any error. +A notable exception is the device removal feature. The fail-safe PMD being a +virtual device, it cannot currently be removed in the sense of a specific bus +hotplug, like for PCI for example. It will however enable this feature for its +sub-device automatically, detecting those that are capable and register the +relevant callback for such event. + Check the feature matrix for the complete set of supported features. Compilation option @@ -179,6 +185,14 @@ apply it upon the probing of its missing sub-device. After this configuration pass, the new sub-device will be synchronized with other sub-devices, i.e. be started if the fail-safe PMD has been started by the user before. +Plug-out feature +---------------- + +A sub-device supporting the device removal event can be removed from its bus at +any time. The fail-safe PMD will register a callback for such event and react +accordingly. It will try to safely stop, close and uninit the sub-device having +emitted this event, allowing it to free its eventual resources. + Fail-safe glossary ------------------ @@ -195,6 +209,7 @@ Preferred device : Primary device Upkeep round Periodical process when slaves are serviced. Each devices having a state different to that of the fail-safe device itself, is synchronized with it. + Additionally, each slave having the remove flag set are cleaned-up. Slave In the context of the fail-safe PMD, synonymous to sub-device. diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c index 12d79ac692..7628c21659 100644 --- a/drivers/net/failsafe/failsafe.c +++ b/drivers/net/failsafe/failsafe.c @@ -142,6 +142,7 @@ fs_hotplug_alarm(void *arg) if (ret) ERROR("Unable to synchronize sub_device state"); } + failsafe_dev_remove(dev); ret = failsafe_hotplug_alarm_install(dev); if (ret) ERROR("Unable to set up next alarm"); diff --git a/drivers/net/failsafe/failsafe_args.c b/drivers/net/failsafe/failsafe_args.c index ec3db553f1..932e371f94 100644 --- a/drivers/net/failsafe/failsafe_args.c +++ b/drivers/net/failsafe/failsafe_args.c @@ -436,6 +436,17 @@ failsafe_args_count_subdevice(struct rte_eth_dev *dev, dev, params); } +static int +fs_parse_sub_device(struct sub_device *sdev) +{ + struct rte_devargs *da; + char devstr[DEVARGS_MAXLEN] = ""; + + da = &sdev->devargs; + snprintf(devstr, sizeof(devstr), "%s,%s", da->name, da->args); + return fs_parse_device(sdev, devstr); +} + int failsafe_args_parse_subs(struct rte_eth_dev *dev) { @@ -448,6 +459,8 @@ failsafe_args_parse_subs(struct rte_eth_dev *dev) continue; if (sdev->cmdline) ret = fs_execute_cmd(sdev, sdev->cmdline); + else + ret = fs_parse_sub_device(sdev); if (ret == 0) sdev->state = DEV_PARSED; } diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c index d11ea67faf..c8f4318e17 100644 --- a/drivers/net/failsafe/failsafe_eal.c +++ b/drivers/net/failsafe/failsafe_eal.c @@ -63,6 +63,7 @@ fs_bus_init(struct rte_eth_dev *dev) return -ENODEV; } SUB_ID(sdev) = i; + sdev->fs_dev = dev; sdev->dev = ETH(sdev)->device; ETH(sdev)->state = RTE_ETH_DEV_DEFERRED; sdev->state = DEV_PROBED; @@ -80,7 +81,7 @@ failsafe_eal_init(struct rte_eth_dev *dev) return ret; if (PRIV(dev)->state < DEV_PROBED) PRIV(dev)->state = DEV_PROBED; - fs_switch_dev(dev); + fs_switch_dev(dev, NULL); return 0; } diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c index 2958207a69..359b8c969e 100644 --- a/drivers/net/failsafe/failsafe_ether.c +++ b/drivers/net/failsafe/failsafe_ether.c @@ -250,6 +250,67 @@ fs_eth_dev_conf_apply(struct rte_eth_dev *dev, return 0; } +static void +fs_dev_remove(struct sub_device *sdev) +{ + int ret; + + if (sdev == NULL) + return; + switch (sdev->state) { + case DEV_STARTED: + rte_eth_dev_stop(PORT_ID(sdev)); + sdev->state = DEV_ACTIVE; + /* fallthrough */ + case DEV_ACTIVE: + rte_eth_dev_close(PORT_ID(sdev)); + sdev->state = DEV_PROBED; + /* fallthrough */ + case DEV_PROBED: + ret = rte_eal_hotplug_remove(sdev->bus->name, + sdev->dev->name); + if (ret) { + ERROR("Bus detach failed for sub_device %u", + SUB_ID(sdev)); + } else { + ETH(sdev)->state = RTE_ETH_DEV_UNUSED; + } + sdev->state = DEV_PARSED; + /* fallthrough */ + case DEV_PARSED: + case DEV_UNDEFINED: + sdev->state = DEV_UNDEFINED; + /* the end */ + break; + } + failsafe_hotplug_alarm_install(sdev->fs_dev); +} + +static inline int +fs_rxtx_clean(struct sub_device *sdev) +{ + uint16_t i; + + for (i = 0; i < ETH(sdev)->data->nb_rx_queues; i++) + if (FS_ATOMIC_RX(sdev, i)) + return 0; + for (i = 0; i < ETH(sdev)->data->nb_tx_queues; i++) + if (FS_ATOMIC_TX(sdev, i)) + return 0; + return 1; +} + +void +failsafe_dev_remove(struct rte_eth_dev *dev) +{ + struct sub_device *sdev; + uint8_t i; + + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) + if (sdev->remove && fs_rxtx_clean(sdev)) + fs_dev_remove(sdev); +} + int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev) { @@ -263,13 +324,13 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev) ret = failsafe_args_parse_subs(dev); if (ret) - return ret; + goto err_remove; if (PRIV(dev)->state < DEV_PROBED) return 0; ret = failsafe_eal_init(dev); if (ret) - return ret; + goto err_remove; if (PRIV(dev)->state < DEV_ACTIVE) return 0; inactive = 0; @@ -278,15 +339,14 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev) inactive |= UINT32_C(1) << i; ret = dev->dev_ops->dev_configure(dev); if (ret) - return ret; + goto err_remove; FOREACH_SUBDEV(sdev, i, dev) { if (inactive & (UINT32_C(1) << i)) { ret = fs_eth_dev_conf_apply(dev, sdev); if (ret) { ERROR("Could not apply configuration to sub_device %d", i); - /* TODO: disable device */ - return ret; + goto err_remove; } } } @@ -300,6 +360,30 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev) return 0; ret = dev->dev_ops->dev_start(dev); if (ret) - return ret; + goto err_remove; + return 0; +err_remove: + FOREACH_SUBDEV(sdev, i, dev) + if (sdev->state != PRIV(dev)->state) + sdev->remove = 1; + return ret; +} + +int +failsafe_eth_rmv_event_callback(uint8_t port_id __rte_unused, + enum rte_eth_event_type event __rte_unused, + void *cb_arg, void *out __rte_unused) +{ + struct sub_device *sdev = cb_arg; + + /* Switch as soon as possible tx_dev. */ + fs_switch_dev(sdev->fs_dev, sdev); + /* Use safe bursts in any case. */ + set_burst_fn(sdev->fs_dev, 1); + /* + * Async removal, the sub-PMD will try to unregister + * the callback at the source of the current thread context. + */ + sdev->remove = 1; return 0; } diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c index ac3257e30a..e8edf15f43 100644 --- a/drivers/net/failsafe/failsafe_ops.c +++ b/drivers/net/failsafe/failsafe_ops.c @@ -34,6 +34,7 @@ #include #include +#include #include #include #include @@ -204,9 +205,21 @@ fs_dev_configure(struct rte_eth_dev *dev) } } FOREACH_SUBDEV(sdev, i, dev) { + int rmv_interrupt = 0; + if (sdev->state != DEV_PROBED) continue; + + rmv_interrupt = ETH(sdev)->data->dev_flags & + RTE_ETH_DEV_INTR_RMV; + if (rmv_interrupt) { + DEBUG("Enabling RMV interrupts for sub_device %d", i); + dev->data->dev_conf.intr_conf.rmv = 1; + } else { + DEBUG("sub_device %d does not support RMV event", i); + } DEBUG("Configuring sub-device %d", i); + sdev->remove = 0; ret = rte_eth_dev_configure(PORT_ID(sdev), dev->data->nb_rx_queues, dev->data->nb_tx_queues, @@ -215,6 +228,16 @@ fs_dev_configure(struct rte_eth_dev *dev) ERROR("Could not configure sub_device %d", i); return ret; } + if (rmv_interrupt) { + ret = rte_eth_dev_callback_register(PORT_ID(sdev), + RTE_ETH_EVENT_INTR_RMV, + failsafe_eth_rmv_event_callback, + sdev); + if (ret) + WARN("Failed to register RMV callback for sub_device %d", + SUB_ID(sdev)); + } + dev->data->dev_conf.intr_conf.rmv = 0; sdev->state = DEV_ACTIVE; } if (PRIV(dev)->state < DEV_ACTIVE) @@ -240,7 +263,7 @@ fs_dev_start(struct rte_eth_dev *dev) } if (PRIV(dev)->state < DEV_STARTED) PRIV(dev)->state = DEV_STARTED; - fs_switch_dev(dev); + fs_switch_dev(dev, NULL); return 0; } @@ -351,10 +374,14 @@ fs_rx_queue_setup(struct rte_eth_dev *dev, fs_rx_queue_release(rxq); dev->data->rx_queues[rx_queue_id] = NULL; } - rxq = rte_zmalloc(NULL, sizeof(*rxq), + rxq = rte_zmalloc(NULL, + sizeof(*rxq) + + sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail, RTE_CACHE_LINE_SIZE); if (rxq == NULL) return -ENOMEM; + FOREACH_SUBDEV(sdev, i, dev) + rte_atomic64_init(&rxq->refcnt[i]); rxq->qid = rx_queue_id; rxq->socket_id = socket_id; rxq->info.mp = mb_pool; @@ -414,10 +441,14 @@ fs_tx_queue_setup(struct rte_eth_dev *dev, fs_tx_queue_release(txq); dev->data->tx_queues[tx_queue_id] = NULL; } - txq = rte_zmalloc("ethdev TX queue", sizeof(*txq), + txq = rte_zmalloc("ethdev TX queue", + sizeof(*txq) + + sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail, RTE_CACHE_LINE_SIZE); if (txq == NULL) return -ENOMEM; + FOREACH_SUBDEV(sdev, i, dev) + rte_atomic64_init(&txq->refcnt[i]); txq->qid = tx_queue_id; txq->socket_id = socket_id; txq->info.conf = *tx_conf; diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h index aac47542bc..80bbc613d6 100644 --- a/drivers/net/failsafe/failsafe_private.h +++ b/drivers/net/failsafe/failsafe_private.h @@ -36,6 +36,7 @@ #include +#include #include #include #include @@ -65,6 +66,7 @@ struct rxq { uint8_t last_polled; unsigned int socket_id; struct rte_eth_rxq_info info; + rte_atomic64_t refcnt[]; }; struct txq { @@ -72,6 +74,7 @@ struct txq { uint16_t qid; unsigned int socket_id; struct rte_eth_txq_info info; + rte_atomic64_t refcnt[]; }; struct rte_flow { @@ -101,6 +104,10 @@ struct sub_device { enum dev_state state; /* Some device are defined as a command line */ char *cmdline; + /* fail-safe device backreference */ + struct rte_eth_dev *fs_dev; + /* flag calling for recollection */ + volatile unsigned int remove:1; }; struct fs_priv { @@ -168,6 +175,10 @@ int failsafe_eal_uninit(struct rte_eth_dev *dev); /* ETH_DEV */ int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev); +void failsafe_dev_remove(struct rte_eth_dev *dev); +int failsafe_eth_rmv_event_callback(uint8_t port_id, + enum rte_eth_event_type type, + void *arg, void *out); /* GLOBALS */ @@ -233,6 +244,39 @@ extern int mac_from_arg; #define SUBOPS(s, ops) \ (ETH(s)->dev_ops->ops) +/** + * Atomic guard + */ + +/** + * a: (rte_atomic64_t) + */ +#define FS_ATOMIC_P(a) \ + rte_atomic64_add(&(a), 1) + +/** + * a: (rte_atomic64_t) + */ +#define FS_ATOMIC_V(a) \ + rte_atomic64_sub(&(a), 1) + +/** + * s: (struct sub_device *) + * i: uint16_t qid + */ +#define FS_ATOMIC_RX(s, i) \ + rte_atomic64_read( \ + &((struct rxq *)((s)->fs_dev->data->rx_queues[i]))->refcnt[(s)->sid] \ + ) +/** + * s: (struct sub_device *) + * i: uint16_t qid + */ +#define FS_ATOMIC_TX(s, i) \ + rte_atomic64_read( \ + &((struct txq *)((s)->fs_dev->data->tx_queues[i]))->refcnt[(s)->sid] \ + ) + #define LOG__(level, m, ...) \ RTE_LOG(level, PMD, "net_failsafe: " m "%c", __VA_ARGS__) #define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n') @@ -257,33 +301,45 @@ fs_find_next(struct rte_eth_dev *dev, uint8_t sid, return sid; } +/* + * Switch emitting device. + * If banned is set, banned must not be considered for + * the role of emitting device. + */ static inline void -fs_switch_dev(struct rte_eth_dev *dev) +fs_switch_dev(struct rte_eth_dev *dev, + struct sub_device *banned) { + struct sub_device *txd; enum dev_state req_state; req_state = PRIV(dev)->state; - if (PREFERRED_SUBDEV(dev)->state >= req_state) { - if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev) && - (TX_SUBDEV(dev) == NULL || + txd = TX_SUBDEV(dev); + if (PREFERRED_SUBDEV(dev)->state >= req_state && + PREFERRED_SUBDEV(dev) != banned) { + if (txd != PREFERRED_SUBDEV(dev) && + (txd == NULL || (req_state == DEV_STARTED) || - (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED))) { + (txd && txd->state < DEV_STARTED))) { DEBUG("Switching tx_dev to preferred sub_device"); PRIV(dev)->subs_tx = 0; } - } else if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) || - TX_SUBDEV(dev) == NULL) { + } else if ((txd && txd->state < req_state) || + txd == NULL || + txd == banned) { struct sub_device *sdev; uint8_t i; /* Using acceptable device */ FOREACH_SUBDEV_STATE(sdev, i, dev, req_state) { + if (sdev == banned) + continue; DEBUG("Switching tx_dev to sub_device %d", i); PRIV(dev)->subs_tx = i; break; } - } else if (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) { + } else if (txd && txd->state < req_state) { DEBUG("No device ready, deactivating tx_dev"); PRIV(dev)->subs_tx = PRIV(dev)->subs_tail; } else { diff --git a/drivers/net/failsafe/failsafe_rxtx.c b/drivers/net/failsafe/failsafe_rxtx.c index 5e380fe924..7311421538 100644 --- a/drivers/net/failsafe/failsafe_rxtx.c +++ b/drivers/net/failsafe/failsafe_rxtx.c @@ -31,6 +31,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include #include #include #include @@ -112,8 +113,10 @@ failsafe_rx_burst(void *queue, if (unlikely(fs_rx_unsafe(sdev))) continue; sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid]; + FS_ATOMIC_P(rxq->refcnt[sdev->sid]); nb_rx = ETH(sdev)-> rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts); + FS_ATOMIC_V(rxq->refcnt[sdev->sid]); if (nb_rx) { rxq->last_polled = i; return nb_rx; @@ -146,8 +149,10 @@ failsafe_rx_burst_fast(void *queue, sdev = &priv->subs[i]; RTE_ASSERT(!fs_rx_unsafe(sdev)); sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid]; + FS_ATOMIC_P(rxq->refcnt[sdev->sid]); nb_rx = ETH(sdev)-> rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts); + FS_ATOMIC_V(rxq->refcnt[sdev->sid]); if (nb_rx) { rxq->last_polled = i; return nb_rx; @@ -164,13 +169,17 @@ failsafe_tx_burst(void *queue, struct sub_device *sdev; struct txq *txq; void *sub_txq; + uint16_t nb_tx; txq = queue; sdev = TX_SUBDEV(txq->priv->dev); if (unlikely(fs_tx_unsafe(sdev))) return 0; sub_txq = ETH(sdev)->data->tx_queues[txq->qid]; - return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts); + FS_ATOMIC_P(txq->refcnt[sdev->sid]); + nb_tx = ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts); + FS_ATOMIC_V(txq->refcnt[sdev->sid]); + return nb_tx; } uint16_t @@ -181,10 +190,14 @@ failsafe_tx_burst_fast(void *queue, struct sub_device *sdev; struct txq *txq; void *sub_txq; + uint16_t nb_tx; txq = queue; sdev = TX_SUBDEV(txq->priv->dev); RTE_ASSERT(!fs_tx_unsafe(sdev)); sub_txq = ETH(sdev)->data->tx_queues[txq->qid]; - return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts); + FS_ATOMIC_P(txq->refcnt[sdev->sid]); + nb_tx = ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts); + FS_ATOMIC_V(txq->refcnt[sdev->sid]); + return nb_tx; } -- 2.20.1