-/**
- * DPDK callback to get flow control status.
- *
- * @param dev
- * Pointer to Ethernet device structure.
- * @param[out] fc_conf
- * Flow control output buffer.
- *
- * @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
- */
-int
-mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
-{
- struct ifreq ifr;
- struct ethtool_pauseparam ethpause = {
- .cmd = ETHTOOL_GPAUSEPARAM
- };
- int ret;
-
- ifr.ifr_data = (void *)ðpause;
- ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
- if (ret) {
- DRV_LOG(WARNING,
- "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:"
- " %s",
- dev->data->port_id, strerror(rte_errno));
- return ret;
- }
- fc_conf->autoneg = ethpause.autoneg;
- if (ethpause.rx_pause && ethpause.tx_pause)
- fc_conf->mode = RTE_FC_FULL;
- else if (ethpause.rx_pause)
- fc_conf->mode = RTE_FC_RX_PAUSE;
- else if (ethpause.tx_pause)
- fc_conf->mode = RTE_FC_TX_PAUSE;
- else
- fc_conf->mode = RTE_FC_NONE;
- return 0;
-}
-
-/**
- * DPDK callback to modify flow control parameters.
- *
- * @param dev
- * Pointer to Ethernet device structure.
- * @param[in] fc_conf
- * Flow control parameters.
- *
- * @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
- */
-int
-mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
-{
- struct ifreq ifr;
- struct ethtool_pauseparam ethpause = {
- .cmd = ETHTOOL_SPAUSEPARAM
- };
- int ret;
-
- ifr.ifr_data = (void *)ðpause;
- ethpause.autoneg = fc_conf->autoneg;
- if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
- (fc_conf->mode & RTE_FC_RX_PAUSE))
- ethpause.rx_pause = 1;
- else
- ethpause.rx_pause = 0;
-
- if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
- (fc_conf->mode & RTE_FC_TX_PAUSE))
- ethpause.tx_pause = 1;
- else
- ethpause.tx_pause = 0;
- ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
- if (ret) {
- DRV_LOG(WARNING,
- "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)"
- " failed: %s",
- dev->data->port_id, strerror(rte_errno));
- return ret;
- }
- return 0;
-}
-
-/**
- * Get PCI information from struct ibv_device.
- *
- * @param device
- * Pointer to Ethernet device structure.
- * @param[out] pci_addr
- * PCI bus address output buffer.
- *
- * @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
- */
-int
-mlx5_ibv_device_to_pci_addr(const struct ibv_device *device,
- struct rte_pci_addr *pci_addr)
-{
- FILE *file;
- char line[32];
- MKSTR(path, "%s/device/uevent", device->ibdev_path);
-
- file = fopen(path, "rb");
- if (file == NULL) {
- rte_errno = errno;
- return -rte_errno;
- }
- while (fgets(line, sizeof(line), file) == line) {
- size_t len = strlen(line);
- int ret;
-
- /* Truncate long lines. */
- if (len == (sizeof(line) - 1))
- while (line[(len - 1)] != '\n') {
- ret = fgetc(file);
- if (ret == EOF)
- break;
- line[(len - 1)] = ret;
- }
- /* Extract information. */
- if (sscanf(line,
- "PCI_SLOT_NAME="
- "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n",
- &pci_addr->domain,
- &pci_addr->bus,
- &pci_addr->devid,
- &pci_addr->function) == 4) {
- ret = 0;
- break;
- }
- }
- fclose(file);
- return 0;
-}
-
-/**
- * Handle asynchronous removal event for entire multiport device.
- *
- * @param sh
- * Infiniband device shared context.
- */
-static void
-mlx5_dev_interrupt_device_fatal(struct mlx5_ibv_shared *sh)
-{
- uint32_t i;
-
- for (i = 0; i < sh->max_port; ++i) {
- struct rte_eth_dev *dev;
-
- if (sh->port[i].ih_port_id >= RTE_MAX_ETHPORTS) {
- /*
- * Or not existing port either no
- * handler installed for this port.
- */
- continue;
- }
- dev = &rte_eth_devices[sh->port[i].ih_port_id];
- assert(dev);
- if (dev->data->dev_conf.intr_conf.rmv)
- _rte_eth_dev_callback_process
- (dev, RTE_ETH_EVENT_INTR_RMV, NULL);
- }
-}
-
-/**
- * Handle shared asynchronous events the NIC (removal event
- * and link status change). Supports multiport IB device.
- *
- * @param cb_arg
- * Callback argument.
- */
-void
-mlx5_dev_interrupt_handler(void *cb_arg)
-{
- struct mlx5_ibv_shared *sh = cb_arg;
- struct ibv_async_event event;
-
- /* Read all message from the IB device and acknowledge them. */
- for (;;) {
- struct rte_eth_dev *dev;
- uint32_t tmp;
-
- if (mlx5_glue->get_async_event(sh->ctx, &event))
- break;
- /* Retrieve and check IB port index. */
- tmp = (uint32_t)event.element.port_num;
- if (!tmp && event.event_type == IBV_EVENT_DEVICE_FATAL) {
- /*
- * The DEVICE_FATAL event is called once for
- * entire device without port specifying.
- * We should notify all existing ports.
- */
- mlx5_glue->ack_async_event(&event);
- mlx5_dev_interrupt_device_fatal(sh);
- continue;
- }
- assert(tmp && (tmp <= sh->max_port));
- if (!tmp) {
- /* Unsupported devive level event. */
- mlx5_glue->ack_async_event(&event);
- DRV_LOG(DEBUG,
- "unsupported common event (type %d)",
- event.event_type);
- continue;
- }
- if (tmp > sh->max_port) {
- /* Invalid IB port index. */
- mlx5_glue->ack_async_event(&event);
- DRV_LOG(DEBUG,
- "cannot handle an event (type %d)"
- "due to invalid IB port index (%u)",
- event.event_type, tmp);
- continue;
- }
- if (sh->port[tmp - 1].ih_port_id >= RTE_MAX_ETHPORTS) {
- /* No handler installed. */
- mlx5_glue->ack_async_event(&event);
- DRV_LOG(DEBUG,
- "cannot handle an event (type %d)"
- "due to no handler installed for port %u",
- event.event_type, tmp);
- continue;
- }
- /* Retrieve ethernet device descriptor. */
- tmp = sh->port[tmp - 1].ih_port_id;
- dev = &rte_eth_devices[tmp];
- assert(dev);
- if ((event.event_type == IBV_EVENT_PORT_ACTIVE ||
- event.event_type == IBV_EVENT_PORT_ERR) &&
- dev->data->dev_conf.intr_conf.lsc) {
- mlx5_glue->ack_async_event(&event);
- if (mlx5_link_update(dev, 0) == -EAGAIN) {
- usleep(0);
- continue;
- }
- _rte_eth_dev_callback_process
- (dev, RTE_ETH_EVENT_INTR_LSC, NULL);
- continue;
- }
- DRV_LOG(DEBUG,
- "port %u cannot handle an unknown event (type %d)",
- dev->data->port_id, event.event_type);
- mlx5_glue->ack_async_event(&event);
- }
-}
-
-/*
- * Unregister callback handler safely. The handler may be active
- * while we are trying to unregister it, in this case code -EAGAIN
- * is returned by rte_intr_callback_unregister(). This routine checks
- * the return code and tries to unregister handler again.
- *
- * @param handle
- * interrupt handle
- * @param cb_fn
- * pointer to callback routine
- * @cb_arg
- * opaque callback parameter
- */
-void
-mlx5_intr_callback_unregister(const struct rte_intr_handle *handle,
- rte_intr_callback_fn cb_fn, void *cb_arg)
-{
- /*
- * Try to reduce timeout management overhead by not calling
- * the timer related routines on the first iteration. If the
- * unregistering succeeds on first call there will be no
- * timer calls at all.
- */
- uint64_t twait = 0;
- uint64_t start = 0;
-
- do {
- int ret;
-
- ret = rte_intr_callback_unregister(handle, cb_fn, cb_arg);
- if (ret >= 0)
- return;
- if (ret != -EAGAIN) {
- DRV_LOG(INFO, "failed to unregister interrupt"
- " handler (error: %d)", ret);
- assert(false);
- return;
- }
- if (twait) {
- struct timespec onems;
-
- /* Wait one millisecond and try again. */
- onems.tv_sec = 0;
- onems.tv_nsec = NS_PER_S / MS_PER_S;
- nanosleep(&onems, 0);
- /* Check whether one second elapsed. */
- if ((rte_get_timer_cycles() - start) <= twait)
- continue;
- } else {
- /*
- * We get the amount of timer ticks for one second.
- * If this amount elapsed it means we spent one
- * second in waiting. This branch is executed once
- * on first iteration.
- */
- twait = rte_get_timer_hz();
- assert(twait);
- }
- /*
- * Timeout elapsed, show message (once a second) and retry.
- * We have no other acceptable option here, if we ignore
- * the unregistering return code the handler will not
- * be unregistered, fd will be closed and we may get the
- * crush. Hanging and messaging in the loop seems not to be
- * the worst choice.
- */
- DRV_LOG(INFO, "Retrying to unregister interrupt handler");
- start = rte_get_timer_cycles();
- } while (true);
-}
-
-/**
- * Handle DEVX interrupts from the NIC.
- * This function is probably called from the DPDK host thread.
- *
- * @param cb_arg
- * Callback argument.
- */
-void
-mlx5_dev_interrupt_handler_devx(void *cb_arg)
-{
-#ifndef HAVE_IBV_DEVX_ASYNC
- (void)cb_arg;
- return;
-#else
- struct mlx5_ibv_shared *sh = cb_arg;
- union {
- struct mlx5dv_devx_async_cmd_hdr cmd_resp;
- uint8_t buf[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
- MLX5_ST_SZ_BYTES(traffic_counter) +
- sizeof(struct mlx5dv_devx_async_cmd_hdr)];
- } out;
- uint8_t *buf = out.buf + sizeof(out.cmd_resp);
-
- while (!mlx5_glue->devx_get_async_cmd_comp(sh->devx_comp,
- &out.cmd_resp,
- sizeof(out.buf)))
- mlx5_flow_async_pool_query_handle
- (sh, (uint64_t)out.cmd_resp.wr_id,
- mlx5_devx_get_out_command_status(buf));
-#endif /* HAVE_IBV_DEVX_ASYNC */
-}
-
-/**
- * Uninstall shared asynchronous device events handler.
- * This function is implemented to support event sharing
- * between multiple ports of single IB device.
- *
- * @param dev
- * Pointer to Ethernet device.
- */
-static void
-mlx5_dev_shared_handler_uninstall(struct rte_eth_dev *dev)
-{
- struct mlx5_priv *priv = dev->data->dev_private;
- struct mlx5_ibv_shared *sh = priv->sh;
-
- if (rte_eal_process_type() != RTE_PROC_PRIMARY)
- return;
- pthread_mutex_lock(&sh->intr_mutex);
- assert(priv->ibv_port);
- assert(priv->ibv_port <= sh->max_port);
- assert(dev->data->port_id < RTE_MAX_ETHPORTS);
- if (sh->port[priv->ibv_port - 1].ih_port_id >= RTE_MAX_ETHPORTS)
- goto exit;
- assert(sh->port[priv->ibv_port - 1].ih_port_id ==
- (uint32_t)dev->data->port_id);
- assert(sh->intr_cnt);
- sh->port[priv->ibv_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
- if (!sh->intr_cnt || --sh->intr_cnt)
- goto exit;
- mlx5_intr_callback_unregister(&sh->intr_handle,
- mlx5_dev_interrupt_handler, sh);
- sh->intr_handle.fd = 0;
- sh->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
- if (sh->intr_handle_devx.fd) {
- rte_intr_callback_unregister(&sh->intr_handle_devx,
- mlx5_dev_interrupt_handler_devx,
- sh);
- sh->intr_handle_devx.fd = 0;
- sh->intr_handle_devx.type = RTE_INTR_HANDLE_UNKNOWN;
- }
- if (sh->devx_comp) {
- mlx5_glue->devx_destroy_cmd_comp(sh->devx_comp);
- sh->devx_comp = NULL;
- }
-exit:
- pthread_mutex_unlock(&sh->intr_mutex);
-}
-
-/**
- * Install shared asynchronous device events handler.
- * This function is implemented to support event sharing
- * between multiple ports of single IB device.
- *
- * @param dev
- * Pointer to Ethernet device.
- */
-static void
-mlx5_dev_shared_handler_install(struct rte_eth_dev *dev)
-{
- struct mlx5_priv *priv = dev->data->dev_private;
- struct mlx5_ibv_shared *sh = priv->sh;
- int ret;
- int flags;
-
- if (rte_eal_process_type() != RTE_PROC_PRIMARY)
- return;
- pthread_mutex_lock(&sh->intr_mutex);
- assert(priv->ibv_port);
- assert(priv->ibv_port <= sh->max_port);
- assert(dev->data->port_id < RTE_MAX_ETHPORTS);
- if (sh->port[priv->ibv_port - 1].ih_port_id < RTE_MAX_ETHPORTS) {
- /* The handler is already installed for this port. */
- assert(sh->intr_cnt);
- goto exit;
- }
- sh->port[priv->ibv_port - 1].ih_port_id = (uint32_t)dev->data->port_id;
- if (sh->intr_cnt) {
- sh->intr_cnt++;
- goto exit;
- }
- /* No shared handler installed. */
- assert(sh->ctx->async_fd > 0);
- flags = fcntl(sh->ctx->async_fd, F_GETFL);
- ret = fcntl(sh->ctx->async_fd, F_SETFL, flags | O_NONBLOCK);
- if (ret) {
- DRV_LOG(INFO, "failed to change file descriptor"
- " async event queue");
- goto error;
- }
- sh->intr_handle.fd = sh->ctx->async_fd;
- sh->intr_handle.type = RTE_INTR_HANDLE_EXT;
- rte_intr_callback_register(&sh->intr_handle,
- mlx5_dev_interrupt_handler, sh);
- if (priv->config.devx) {
-#ifndef HAVE_IBV_DEVX_ASYNC
- goto error_unregister;
-#else
- sh->devx_comp = mlx5_glue->devx_create_cmd_comp(sh->ctx);
- if (sh->devx_comp) {
- flags = fcntl(sh->devx_comp->fd, F_GETFL);
- ret = fcntl(sh->devx_comp->fd, F_SETFL,
- flags | O_NONBLOCK);
- if (ret) {
- DRV_LOG(INFO, "failed to change file descriptor"
- " devx async event queue");
- goto error_unregister;
- }
- sh->intr_handle_devx.fd = sh->devx_comp->fd;
- sh->intr_handle_devx.type = RTE_INTR_HANDLE_EXT;
- rte_intr_callback_register
- (&sh->intr_handle_devx,
- mlx5_dev_interrupt_handler_devx, sh);
- } else {
- DRV_LOG(INFO, "failed to create devx async command "
- "completion");
- goto error_unregister;
- }
-#endif /* HAVE_IBV_DEVX_ASYNC */
- }
- sh->intr_cnt++;
- goto exit;
-error_unregister:
- rte_intr_callback_unregister(&sh->intr_handle,
- mlx5_dev_interrupt_handler, sh);
-error:
- /* Indicate there will be no interrupts. */
- dev->data->dev_conf.intr_conf.lsc = 0;
- dev->data->dev_conf.intr_conf.rmv = 0;
- sh->intr_handle.fd = 0;
- sh->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
- sh->port[priv->ibv_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
-exit:
- pthread_mutex_unlock(&sh->intr_mutex);
-}
-
-/**
- * Uninstall interrupt handler.
- *
- * @param dev
- * Pointer to Ethernet device.
- */
-void
-mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev)
-{
- mlx5_dev_shared_handler_uninstall(dev);
-}
-
-/**
- * Install interrupt handler.
- *
- * @param dev
- * Pointer to Ethernet device.
- */
-void
-mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev)
-{
- mlx5_dev_shared_handler_install(dev);
-}
-
-/**
- * DPDK callback to bring the link DOWN.
- *
- * @param dev
- * Pointer to Ethernet device structure.
- *
- * @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
- */
-int
-mlx5_set_link_down(struct rte_eth_dev *dev)
-{
- return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP);
-}
-
-/**
- * DPDK callback to bring the link UP.
- *
- * @param dev
- * Pointer to Ethernet device structure.
- *
- * @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
- */
-int
-mlx5_set_link_up(struct rte_eth_dev *dev)
-{
- return mlx5_set_flags(dev, ~IFF_UP, IFF_UP);
-}
-
-/**
- * Configure the TX function to use.
- *
- * @param dev
- * Pointer to private data structure.
- *
- * @return
- * Pointer to selected Tx burst function.
- */
-eth_tx_burst_t
-mlx5_select_tx_function(struct rte_eth_dev *dev)
-{
- struct mlx5_priv *priv = dev->data->dev_private;
- eth_tx_burst_t tx_pkt_burst = mlx5_tx_burst;
- struct mlx5_dev_config *config = &priv->config;
- uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads;
- int tso = !!(tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO |
- DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
- DEV_TX_OFFLOAD_GRE_TNL_TSO |
- DEV_TX_OFFLOAD_IP_TNL_TSO |
- DEV_TX_OFFLOAD_UDP_TNL_TSO));
- int swp = !!(tx_offloads & (DEV_TX_OFFLOAD_IP_TNL_TSO |
- DEV_TX_OFFLOAD_UDP_TNL_TSO |
- DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM));
- int vlan_insert = !!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT);
-
- assert(priv != NULL);
- /* Select appropriate TX function. */
- if (vlan_insert || tso || swp)
- return tx_pkt_burst;
- if (config->mps == MLX5_MPW_ENHANCED) {
- if (mlx5_check_vec_tx_support(dev) > 0) {
- if (mlx5_check_raw_vec_tx_support(dev) > 0)
- tx_pkt_burst = mlx5_tx_burst_raw_vec;
- else
- tx_pkt_burst = mlx5_tx_burst_vec;
- DRV_LOG(DEBUG,
- "port %u selected enhanced MPW Tx vectorized"
- " function",
- dev->data->port_id);
- } else {
- tx_pkt_burst = mlx5_tx_burst_empw;
- DRV_LOG(DEBUG,
- "port %u selected enhanced MPW Tx function",
- dev->data->port_id);
- }
- } else if (config->mps && (config->txq_inline > 0)) {
- tx_pkt_burst = mlx5_tx_burst_mpw_inline;
- DRV_LOG(DEBUG, "port %u selected MPW inline Tx function",
- dev->data->port_id);
- } else if (config->mps) {
- tx_pkt_burst = mlx5_tx_burst_mpw;
- DRV_LOG(DEBUG, "port %u selected MPW Tx function",
- dev->data->port_id);
- }
- return tx_pkt_burst;
-}
-