net/ena: linearize Tx mbuf
[dpdk.git] / drivers / net / ena / ena_ethdev.c
index 1b8fc0f..cdefcd3 100644 (file)
@@ -204,7 +204,8 @@ static const struct rte_pci_id pci_id_ena_map[] = {
 static struct ena_aenq_handlers aenq_handlers;
 
 static int ena_device_init(struct ena_com_dev *ena_dev,
-                          struct ena_com_dev_get_features_ctx *get_feat_ctx);
+                          struct ena_com_dev_get_features_ctx *get_feat_ctx,
+                          bool *wd_state);
 static int ena_dev_configure(struct rte_eth_dev *dev);
 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
                                  uint16_t nb_pkts);
@@ -225,6 +226,7 @@ static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
 static int ena_start(struct rte_eth_dev *dev);
 static void ena_stop(struct rte_eth_dev *dev);
 static void ena_close(struct rte_eth_dev *dev);
+static int ena_dev_reset(struct rte_eth_dev *dev);
 static int ena_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats);
 static void ena_rx_queue_release_all(struct rte_eth_dev *dev);
 static void ena_tx_queue_release_all(struct rte_eth_dev *dev);
@@ -248,6 +250,7 @@ static int ena_rss_reta_query(struct rte_eth_dev *dev,
                              uint16_t reta_size);
 static int ena_get_sset_count(struct rte_eth_dev *dev, int sset);
 static void ena_interrupt_handler_rte(void *cb_arg);
+static void ena_timer_wd_callback(struct rte_timer *timer, void *arg);
 
 static const struct eth_dev_ops ena_dev_ops = {
        .dev_configure        = ena_dev_configure,
@@ -262,6 +265,7 @@ static const struct eth_dev_ops ena_dev_ops = {
        .rx_queue_release     = ena_rx_queue_release,
        .tx_queue_release     = ena_tx_queue_release,
        .dev_close            = ena_close,
+       .dev_reset            = ena_dev_reset,
        .reta_update          = ena_rss_reta_update,
        .reta_query           = ena_rss_reta_query,
 };
@@ -364,6 +368,19 @@ static inline void ena_tx_mbuf_prepare(struct rte_mbuf *mbuf,
        }
 }
 
+static inline int validate_rx_req_id(struct ena_ring *rx_ring, uint16_t req_id)
+{
+       if (likely(req_id < rx_ring->ring_size))
+               return 0;
+
+       RTE_LOG(ERR, PMD, "Invalid rx req_id: %hu\n", req_id);
+
+       rx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
+       rx_ring->adapter->trigger_reset = true;
+
+       return -EFAULT;
+}
+
 static void ena_config_host_info(struct ena_com_dev *ena_dev)
 {
        struct ena_admin_host_info *host_info;
@@ -470,6 +487,67 @@ static void ena_close(struct rte_eth_dev *dev)
        ena_tx_queue_release_all(dev);
 }
 
+static int
+ena_dev_reset(struct rte_eth_dev *dev)
+{
+       struct rte_mempool *mb_pool_rx[ENA_MAX_NUM_QUEUES];
+       struct rte_eth_dev *eth_dev;
+       struct rte_pci_device *pci_dev;
+       struct rte_intr_handle *intr_handle;
+       struct ena_com_dev *ena_dev;
+       struct ena_com_dev_get_features_ctx get_feat_ctx;
+       struct ena_adapter *adapter;
+       int nb_queues;
+       int rc, i;
+       bool wd_state;
+
+       adapter = (struct ena_adapter *)(dev->data->dev_private);
+       ena_dev = &adapter->ena_dev;
+       eth_dev = adapter->rte_dev;
+       pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
+       intr_handle = &pci_dev->intr_handle;
+       nb_queues = eth_dev->data->nb_rx_queues;
+
+       ena_com_set_admin_running_state(ena_dev, false);
+
+       ena_com_dev_reset(ena_dev, adapter->reset_reason);
+
+       for (i = 0; i < nb_queues; i++)
+               mb_pool_rx[i] = adapter->rx_ring[i].mb_pool;
+
+       ena_rx_queue_release_all(eth_dev);
+       ena_tx_queue_release_all(eth_dev);
+
+       rte_intr_disable(intr_handle);
+
+       ena_com_abort_admin_commands(ena_dev);
+       ena_com_wait_for_abort_completion(ena_dev);
+       ena_com_admin_destroy(ena_dev);
+       ena_com_mmio_reg_read_request_destroy(ena_dev);
+
+       rc = ena_device_init(ena_dev, &get_feat_ctx, &wd_state);
+       if (rc) {
+               PMD_INIT_LOG(CRIT, "Cannot initialize device\n");
+               return rc;
+       }
+       adapter->wd_state = wd_state;
+
+       rte_intr_enable(intr_handle);
+       ena_com_set_admin_polling_mode(ena_dev, false);
+       ena_com_admin_aenq_enable(ena_dev);
+
+       for (i = 0; i < nb_queues; ++i)
+               ena_rx_queue_setup(eth_dev, i, adapter->rx_ring_size, 0, NULL,
+                       mb_pool_rx[i]);
+
+       for (i = 0; i < nb_queues; ++i)
+               ena_tx_queue_setup(eth_dev, i, adapter->tx_ring_size, 0, NULL);
+
+       adapter->trigger_reset = false;
+
+       return 0;
+}
+
 static int ena_rss_reta_update(struct rte_eth_dev *dev,
                               struct rte_eth_rss_reta_entry64 *reta_conf,
                               uint16_t reta_size)
@@ -659,6 +737,10 @@ static void ena_rx_queue_release(void *queue)
                rte_free(ring->rx_buffer_info);
        ring->rx_buffer_info = NULL;
 
+       if (ring->empty_rx_reqs)
+               rte_free(ring->empty_rx_reqs);
+       ring->empty_rx_reqs = NULL;
+
        ring->configured = 0;
 
        RTE_LOG(NOTICE, PMD, "RX Queue %d:%d released\n",
@@ -749,13 +831,18 @@ static int ena_queue_restart_all(struct rte_eth_dev *dev,
        struct ena_adapter *adapter =
                (struct ena_adapter *)(dev->data->dev_private);
        struct ena_ring *queues = NULL;
+       int nb_queues;
        int i = 0;
        int rc = 0;
 
-       queues = (ring_type == ENA_RING_TYPE_RX) ?
-               adapter->rx_ring : adapter->tx_ring;
-
-       for (i = 0; i < adapter->num_queues; i++) {
+       if (ring_type == ENA_RING_TYPE_RX) {
+               queues = adapter->rx_ring;
+               nb_queues = dev->data->nb_rx_queues;
+       } else {
+               queues = adapter->tx_ring;
+               nb_queues = dev->data->nb_tx_queues;
+       }
+       for (i = 0; i < nb_queues; i++) {
                if (queues[i].configured) {
                        if (ring_type == ENA_RING_TYPE_RX) {
                                ena_assert_msg(
@@ -807,6 +894,7 @@ static int ena_check_valid_conf(struct ena_adapter *adapter)
 
 static int
 ena_calc_queue_size(struct ena_com_dev *ena_dev,
+                   u16 *max_tx_sgl_size,
                    struct ena_com_dev_get_features_ctx *get_feat_ctx)
 {
        uint32_t queue_size = ENA_DEFAULT_RING_SIZE;
@@ -829,6 +917,9 @@ ena_calc_queue_size(struct ena_com_dev *ena_dev,
                return -EFAULT;
        }
 
+       *max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS,
+               get_feat_ctx->max_queues.max_packet_tx_descs);
+
        return queue_size;
 }
 
@@ -915,6 +1006,7 @@ static int ena_start(struct rte_eth_dev *dev)
 {
        struct ena_adapter *adapter =
                (struct ena_adapter *)(dev->data->dev_private);
+       uint64_t ticks;
        int rc = 0;
 
        rc = ena_check_valid_conf(adapter);
@@ -938,6 +1030,13 @@ static int ena_start(struct rte_eth_dev *dev)
 
        ena_stats_restart(dev);
 
+       adapter->timestamp_wd = rte_get_timer_cycles();
+       adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT;
+
+       ticks = rte_get_timer_hz();
+       rte_timer_reset(&adapter->timer_wd, ticks, PERIODICAL, rte_lcore_id(),
+                       ena_timer_wd_callback, adapter);
+
        adapter->state = ENA_ADAPTER_STATE_RUNNING;
 
        return 0;
@@ -948,6 +1047,8 @@ static void ena_stop(struct rte_eth_dev *dev)
        struct ena_adapter *adapter =
                (struct ena_adapter *)(dev->data->dev_private);
 
+       rte_timer_stop_sync(&adapter->timer_wd);
+
        adapter->state = ENA_ADAPTER_STATE_STOPPED;
 }
 
@@ -1069,7 +1170,10 @@ static int ena_tx_queue_setup(struct rte_eth_dev *dev,
        for (i = 0; i < txq->ring_size; i++)
                txq->empty_tx_reqs[i] = i;
 
-       txq->offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
+       if (tx_conf != NULL) {
+               txq->offloads =
+                       tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
+       }
 
        /* Store pointer to this queue in upper layer */
        txq->configured = 1;
@@ -1093,7 +1197,7 @@ static int ena_rx_queue_setup(struct rte_eth_dev *dev,
                (struct ena_adapter *)(dev->data->dev_private);
        struct ena_ring *rxq = NULL;
        uint16_t ena_qid = 0;
-       int rc = 0;
+       int i, rc = 0;
        struct ena_com_dev *ena_dev = &adapter->ena_dev;
 
        rxq = &adapter->rx_ring[queue_idx];
@@ -1159,6 +1263,19 @@ static int ena_rx_queue_setup(struct rte_eth_dev *dev,
                return -ENOMEM;
        }
 
+       rxq->empty_rx_reqs = rte_zmalloc("rxq->empty_rx_reqs",
+                                        sizeof(uint16_t) * nb_desc,
+                                        RTE_CACHE_LINE_SIZE);
+       if (!rxq->empty_rx_reqs) {
+               RTE_LOG(ERR, PMD, "failed to alloc mem for empty rx reqs\n");
+               rte_free(rxq->rx_buffer_info);
+               rxq->rx_buffer_info = NULL;
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < nb_desc; i++)
+               rxq->empty_tx_reqs[i] = i;
+
        /* Store pointer to this queue in upper layer */
        rxq->configured = 1;
        dev->data->rx_queues[queue_idx] = rxq;
@@ -1173,7 +1290,7 @@ static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count)
        uint16_t ring_size = rxq->ring_size;
        uint16_t ring_mask = ring_size - 1;
        uint16_t next_to_use = rxq->next_to_use;
-       uint16_t in_use;
+       uint16_t in_use, req_id;
        struct rte_mbuf **mbufs = &rxq->rx_buffer_info[0];
 
        if (unlikely(!count))
@@ -1201,12 +1318,14 @@ static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count)
                struct ena_com_buf ebuf;
 
                rte_prefetch0(mbufs[((next_to_use + 4) & ring_mask)]);
+
+               req_id = rxq->empty_rx_reqs[next_to_use_masked];
                /* prepare physical address for DMA transaction */
                ebuf.paddr = mbuf->buf_iova + RTE_PKTMBUF_HEADROOM;
                ebuf.len = mbuf->buf_len - RTE_PKTMBUF_HEADROOM;
                /* pass resource to device */
                rc = ena_com_add_single_rx_desc(rxq->ena_com_io_sq,
-                                               &ebuf, next_to_use_masked);
+                                               &ebuf, req_id);
                if (unlikely(rc)) {
                        rte_mempool_put_bulk(rxq->mb_pool, (void **)(&mbuf),
                                             count - i);
@@ -1229,7 +1348,8 @@ static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count)
 }
 
 static int ena_device_init(struct ena_com_dev *ena_dev,
-                          struct ena_com_dev_get_features_ctx *get_feat_ctx)
+                          struct ena_com_dev_get_features_ctx *get_feat_ctx,
+                          bool *wd_state)
 {
        uint32_t aenq_groups;
        int rc;
@@ -1291,7 +1411,8 @@ static int ena_device_init(struct ena_com_dev *ena_dev,
        }
 
        aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
-                     BIT(ENA_ADMIN_NOTIFICATION);
+                     BIT(ENA_ADMIN_NOTIFICATION) |
+                     BIT(ENA_ADMIN_KEEP_ALIVE);
 
        aenq_groups &= get_feat_ctx->aenq.supported_groups;
        rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
@@ -1300,6 +1421,8 @@ static int ena_device_init(struct ena_com_dev *ena_dev,
                goto err_admin_init;
        }
 
+       *wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
+
        return 0;
 
 err_admin_init:
@@ -1321,6 +1444,48 @@ static void ena_interrupt_handler_rte(void *cb_arg)
                ena_com_aenq_intr_handler(ena_dev, adapter);
 }
 
+static void check_for_missing_keep_alive(struct ena_adapter *adapter)
+{
+       if (!adapter->wd_state)
+               return;
+
+       if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
+               return;
+
+       if (unlikely((rte_get_timer_cycles() - adapter->timestamp_wd) >=
+           adapter->keep_alive_timeout)) {
+               RTE_LOG(ERR, PMD, "Keep alive timeout\n");
+               adapter->reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO;
+               adapter->trigger_reset = true;
+       }
+}
+
+/* Check if admin queue is enabled */
+static void check_for_admin_com_state(struct ena_adapter *adapter)
+{
+       if (unlikely(!ena_com_get_admin_running_state(&adapter->ena_dev))) {
+               RTE_LOG(ERR, PMD, "ENA admin queue is not in running state!\n");
+               adapter->reset_reason = ENA_REGS_RESET_ADMIN_TO;
+               adapter->trigger_reset = true;
+       }
+}
+
+static void ena_timer_wd_callback(__rte_unused struct rte_timer *timer,
+                                 void *arg)
+{
+       struct ena_adapter *adapter = (struct ena_adapter *)arg;
+       struct rte_eth_dev *dev = adapter->rte_dev;
+
+       check_for_missing_keep_alive(adapter);
+       check_for_admin_com_state(adapter);
+
+       if (unlikely(adapter->trigger_reset)) {
+               RTE_LOG(ERR, PMD, "Trigger reset is on\n");
+               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET,
+                       NULL);
+       }
+}
+
 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev)
 {
        struct rte_pci_device *pci_dev;
@@ -1330,8 +1495,10 @@ static int eth_ena_dev_init(struct rte_eth_dev *eth_dev)
        struct ena_com_dev *ena_dev = &adapter->ena_dev;
        struct ena_com_dev_get_features_ctx get_feat_ctx;
        int queue_size, rc;
+       u16 tx_sgl_size = 0;
 
        static int adapters_found;
+       bool wd_state;
 
        memset(adapter, 0, sizeof(struct ena_adapter));
        ena_dev = &adapter->ena_dev;
@@ -1375,22 +1542,25 @@ static int eth_ena_dev_init(struct rte_eth_dev *eth_dev)
                 adapter->id_number);
 
        /* device specific initialization routine */
-       rc = ena_device_init(ena_dev, &get_feat_ctx);
+       rc = ena_device_init(ena_dev, &get_feat_ctx, &wd_state);
        if (rc) {
                PMD_INIT_LOG(CRIT, "Failed to init ENA device");
                return -1;
        }
+       adapter->wd_state = wd_state;
 
        ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
        adapter->num_queues = get_feat_ctx.max_queues.max_sq_num;
 
-       queue_size = ena_calc_queue_size(ena_dev, &get_feat_ctx);
+       queue_size = ena_calc_queue_size(ena_dev, &tx_sgl_size, &get_feat_ctx);
        if ((queue_size <= 0) || (adapter->num_queues <= 0))
                return -EFAULT;
 
        adapter->tx_ring_size = queue_size;
        adapter->rx_ring_size = queue_size;
 
+       adapter->max_tx_sgl_size = tx_sgl_size;
+
        /* prepare ring structures */
        ena_init_rings(adapter);
 
@@ -1423,6 +1593,10 @@ static int eth_ena_dev_init(struct rte_eth_dev *eth_dev)
        ena_com_set_admin_polling_mode(ena_dev, false);
        ena_com_admin_aenq_enable(ena_dev);
 
+       if (adapters_found == 0)
+               rte_timer_subsystem_init();
+       rte_timer_init(&adapter->timer_wd);
+
        adapters_found++;
        adapter->state = ENA_ADAPTER_STATE_INIT;
 
@@ -1485,6 +1659,7 @@ static void ena_init_rings(struct ena_adapter *adapter)
                ring->id = i;
                ring->tx_mem_queue_type = adapter->ena_dev.tx_mem_queue_type;
                ring->tx_max_header_size = adapter->ena_dev.tx_max_header_size;
+               ring->sgl_size = adapter->max_tx_sgl_size;
        }
 
        for (i = 0; i < adapter->num_queues; i++) {
@@ -1575,6 +1750,7 @@ static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
        unsigned int ring_mask = ring_size - 1;
        uint16_t next_to_clean = rx_ring->next_to_clean;
        uint16_t desc_in_use = 0;
+       uint16_t req_id;
        unsigned int recv_idx = 0;
        struct rte_mbuf *mbuf = NULL;
        struct rte_mbuf *mbuf_head = NULL;
@@ -1615,7 +1791,12 @@ static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                        break;
 
                while (segments < ena_rx_ctx.descs) {
-                       mbuf = rx_buff_info[next_to_clean & ring_mask];
+                       req_id = ena_rx_ctx.ena_bufs[segments].req_id;
+                       rc = validate_rx_req_id(rx_ring, req_id);
+                       if (unlikely(rc))
+                               break;
+
+                       mbuf = rx_buff_info[req_id];
                        mbuf->data_len = ena_rx_ctx.ena_bufs[segments].len;
                        mbuf->data_off = RTE_PKTMBUF_HEADROOM;
                        mbuf->refcnt = 1;
@@ -1632,6 +1813,8 @@ static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                        mbuf_head->pkt_len += mbuf->data_len;
 
                        mbuf_prev = mbuf;
+                       rx_ring->empty_rx_reqs[next_to_clean & ring_mask] =
+                               req_id;
                        segments++;
                        next_to_clean++;
                }
@@ -1736,6 +1919,33 @@ static void ena_update_hints(struct ena_adapter *adapter,
                /* convert to usec */
                adapter->ena_dev.mmio_read.reg_read_to =
                        hints->mmio_read_timeout * 1000;
+
+       if (hints->driver_watchdog_timeout) {
+               if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
+                       adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
+               else
+                       // Convert msecs to ticks
+                       adapter->keep_alive_timeout =
+                               (hints->driver_watchdog_timeout *
+                               rte_get_timer_hz()) / 1000;
+       }
+}
+
+static int ena_check_and_linearize_mbuf(struct ena_ring *tx_ring,
+                                       struct rte_mbuf *mbuf)
+{
+       int num_segments, rc;
+
+       num_segments = mbuf->nb_segs;
+
+       if (likely(num_segments < tx_ring->sgl_size))
+               return 0;
+
+       rc = rte_pktmbuf_linearize(mbuf);
+       if (unlikely(rc))
+               RTE_LOG(WARNING, PMD, "Mbuf linearize failed\n");
+
+       return rc;
 }
 
 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
@@ -1768,6 +1978,10 @@ static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
        for (sent_idx = 0; sent_idx < nb_pkts; sent_idx++) {
                mbuf = tx_pkts[sent_idx];
 
+               rc = ena_check_and_linearize_mbuf(tx_ring, mbuf);
+               if (unlikely(rc))
+                       break;
+
                req_id = tx_ring->empty_tx_reqs[next_to_use & ring_mask];
                tx_info = &tx_ring->tx_buffer_info[req_id];
                tx_info->mbuf = mbuf;
@@ -1955,6 +2169,14 @@ static void ena_notification(void *data,
        }
 }
 
+static void ena_keep_alive(void *adapter_data,
+                          __rte_unused struct ena_admin_aenq_entry *aenq_e)
+{
+       struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
+
+       adapter->timestamp_wd = rte_get_timer_cycles();
+}
+
 /**
  * This handler will called for unknown event group or unimplemented handlers
  **/
@@ -1968,7 +2190,7 @@ static struct ena_aenq_handlers aenq_handlers = {
        .handlers = {
                [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
                [ENA_ADMIN_NOTIFICATION] = ena_notification,
-               [ENA_ADMIN_KEEP_ALIVE] = unimplemented_aenq_handler
+               [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive
        },
        .unimplemented_handler = unimplemented_aenq_handler
 };