drivers/net/sfc/sfc_tx.c

   1 /* SPDX-License-Identifier: BSD-3-Clause
   2  *
   3  * Copyright (c) 2016-2018 Solarflare Communications Inc.
   4  * All rights reserved.
   5  *
   6  * This software was jointly developed between OKTET Labs (under contract
   7  * for Solarflare) and Solarflare Communications, Inc.
   8  */
   9
  10 #include "sfc.h"
  11 #include "sfc_debug.h"
  12 #include "sfc_log.h"
  13 #include "sfc_ev.h"
  14 #include "sfc_tx.h"
  15 #include "sfc_tweak.h"
  16 #include "sfc_kvargs.h"
  17
  18 /*
  19  * Maximum number of TX queue flush attempts in case of
  20  * failure or flush timeout
  21  */
  22 #define SFC_TX_QFLUSH_ATTEMPTS          (3)
  23
  24 /*
  25  * Time to wait between event queue polling attempts when waiting for TX
  26  * queue flush done or flush failed events
  27  */
  28 #define SFC_TX_QFLUSH_POLL_WAIT_MS      (1)
  29
  30 /*
  31  * Maximum number of event queue polling attempts when waiting for TX queue
  32  * flush done or flush failed events; it defines TX queue flush attempt timeout
  33  * together with SFC_TX_QFLUSH_POLL_WAIT_MS
  34  */
  35 #define SFC_TX_QFLUSH_POLL_ATTEMPTS     (2000)
  36
  37 uint64_t
  38 sfc_tx_get_dev_offload_caps(struct sfc_adapter *sa)
  39 {
  40         const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
  41         uint64_t caps = 0;
  42
  43         if ((sa->priv.dp_tx->features & SFC_DP_TX_FEAT_VLAN_INSERT) &&
  44             encp->enc_hw_tx_insert_vlan_enabled)
  45                 caps |= DEV_TX_OFFLOAD_VLAN_INSERT;
  46
  47         if (sa->priv.dp_tx->features & SFC_DP_TX_FEAT_MULTI_SEG)
  48                 caps |= DEV_TX_OFFLOAD_MULTI_SEGS;
  49
  50         if ((~sa->priv.dp_tx->features & SFC_DP_TX_FEAT_MULTI_POOL) &&
  51             (~sa->priv.dp_tx->features & SFC_DP_TX_FEAT_REFCNT))
  52                 caps |= DEV_TX_OFFLOAD_MBUF_FAST_FREE;
  53
  54         return caps;
  55 }
  56
  57 uint64_t
  58 sfc_tx_get_queue_offload_caps(struct sfc_adapter *sa)
  59 {
  60         const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
  61         uint64_t caps = 0;
  62
  63         caps |= DEV_TX_OFFLOAD_IPV4_CKSUM;
  64         caps |= DEV_TX_OFFLOAD_UDP_CKSUM;
  65         caps |= DEV_TX_OFFLOAD_TCP_CKSUM;
  66
  67         if (encp->enc_tunnel_encapsulations_supported)
  68                 caps |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
  69
  70         if (sa->tso)
  71                 caps |= DEV_TX_OFFLOAD_TCP_TSO;
  72
  73         return caps;
  74 }
  75
  76 static int
  77 sfc_tx_qcheck_conf(struct sfc_adapter *sa, unsigned int txq_max_fill_level,
  78                    const struct rte_eth_txconf *tx_conf,
  79                    uint64_t offloads)
  80 {
  81         int rc = 0;
  82
  83         if (tx_conf->tx_rs_thresh != 0) {
  84                 sfc_err(sa, "RS bit in transmit descriptor is not supported");
  85                 rc = EINVAL;
  86         }
  87
  88         if (tx_conf->tx_free_thresh > txq_max_fill_level) {
  89                 sfc_err(sa,
  90                         "TxQ free threshold too large: %u vs maximum %u",
  91                         tx_conf->tx_free_thresh, txq_max_fill_level);
  92                 rc = EINVAL;
  93         }
  94
  95         if (tx_conf->tx_thresh.pthresh != 0 ||
  96             tx_conf->tx_thresh.hthresh != 0 ||
  97             tx_conf->tx_thresh.wthresh != 0) {
  98                 sfc_warn(sa,
  99                         "prefetch/host/writeback thresholds are not supported");
 100         }
 101
 102         /* We either perform both TCP and UDP offload, or no offload at all */
 103         if (((offloads & DEV_TX_OFFLOAD_TCP_CKSUM) == 0) !=
 104             ((offloads & DEV_TX_OFFLOAD_UDP_CKSUM) == 0)) {
 105                 sfc_err(sa, "TCP and UDP offloads can't be set independently");
 106                 rc = EINVAL;
 107         }
 108
 109         return rc;
 110 }
 111
 112 void
 113 sfc_tx_qflush_done(struct sfc_txq_info *txq_info)
 114 {
 115         txq_info->state |= SFC_TXQ_FLUSHED;
 116         txq_info->state &= ~SFC_TXQ_FLUSHING;
 117 }
 118
 119 int
 120 sfc_tx_qinit(struct sfc_adapter *sa, unsigned int sw_index,
 121              uint16_t nb_tx_desc, unsigned int socket_id,
 122              const struct rte_eth_txconf *tx_conf)
 123 {
 124         const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
 125         unsigned int txq_entries;
 126         unsigned int evq_entries;
 127         unsigned int txq_max_fill_level;
 128         struct sfc_txq_info *txq_info;
 129         struct sfc_evq *evq;
 130         struct sfc_txq *txq;
 131         int rc = 0;
 132         struct sfc_dp_tx_qcreate_info info;
 133         uint64_t offloads;
 134         struct sfc_dp_tx_hw_limits hw_limits;
 135
 136         sfc_log_init(sa, "TxQ = %u", sw_index);
 137
 138         memset(&hw_limits, 0, sizeof(hw_limits));
 139         hw_limits.txq_max_entries = sa->txq_max_entries;
 140         hw_limits.txq_min_entries = sa->txq_min_entries;
 141
 142         rc = sa->priv.dp_tx->qsize_up_rings(nb_tx_desc, &hw_limits,
 143                                             &txq_entries, &evq_entries,
 144                                             &txq_max_fill_level);
 145         if (rc != 0)
 146                 goto fail_size_up_rings;
 147         SFC_ASSERT(txq_entries >= sa->txq_min_entries);
 148         SFC_ASSERT(txq_entries <= sa->txq_max_entries);
 149         SFC_ASSERT(txq_entries >= nb_tx_desc);
 150         SFC_ASSERT(txq_max_fill_level <= nb_tx_desc);
 151
 152         offloads = tx_conf->offloads |
 153                 sa->eth_dev->data->dev_conf.txmode.offloads;
 154         rc = sfc_tx_qcheck_conf(sa, txq_max_fill_level, tx_conf, offloads);
 155         if (rc != 0)
 156                 goto fail_bad_conf;
 157
 158         SFC_ASSERT(sw_index < sfc_sa2shared(sa)->txq_count);
 159         txq_info = &sfc_sa2shared(sa)->txq_info[sw_index];
 160
 161         txq_info->entries = txq_entries;
 162
 163         rc = sfc_ev_qinit(sa, SFC_EVQ_TYPE_TX, sw_index,
 164                           evq_entries, socket_id, &evq);
 165         if (rc != 0)
 166                 goto fail_ev_qinit;
 167
 168         txq = &sa->txq_ctrl[sw_index];
 169         txq->hw_index = sw_index;
 170         txq->evq = evq;
 171         txq_info->free_thresh =
 172                 (tx_conf->tx_free_thresh) ? tx_conf->tx_free_thresh :
 173                 SFC_TX_DEFAULT_FREE_THRESH;
 174         txq_info->offloads = offloads;
 175
 176         rc = sfc_dma_alloc(sa, "txq", sw_index,
 177                            efx_txq_size(sa->nic, txq_info->entries),
 178                            socket_id, &txq->mem);
 179         if (rc != 0)
 180                 goto fail_dma_alloc;
 181
 182         memset(&info, 0, sizeof(info));
 183         info.max_fill_level = txq_max_fill_level;
 184         info.free_thresh = txq_info->free_thresh;
 185         info.offloads = offloads;
 186         info.txq_entries = txq_info->entries;
 187         info.dma_desc_size_max = encp->enc_tx_dma_desc_size_max;
 188         info.txq_hw_ring = txq->mem.esm_base;
 189         info.evq_entries = evq_entries;
 190         info.evq_hw_ring = evq->mem.esm_base;
 191         info.hw_index = txq->hw_index;
 192         info.mem_bar = sa->mem_bar.esb_base;
 193         info.vi_window_shift = encp->enc_vi_window_shift;
 194         info.tso_tcp_header_offset_limit =
 195                 encp->enc_tx_tso_tcp_header_offset_limit;
 196
 197         rc = sa->priv.dp_tx->qcreate(sa->eth_dev->data->port_id, sw_index,
 198                                      &RTE_ETH_DEV_TO_PCI(sa->eth_dev)->addr,
 199                                      socket_id, &info, &txq_info->dp);
 200         if (rc != 0)
 201                 goto fail_dp_tx_qinit;
 202
 203         evq->dp_txq = txq_info->dp;
 204
 205         txq_info->state = SFC_TXQ_INITIALIZED;
 206
 207         txq_info->deferred_start = (tx_conf->tx_deferred_start != 0);
 208
 209         return 0;
 210
 211 fail_dp_tx_qinit:
 212         sfc_dma_free(sa, &txq->mem);
 213
 214 fail_dma_alloc:
 215         sfc_ev_qfini(evq);
 216
 217 fail_ev_qinit:
 218         txq_info->entries = 0;
 219
 220 fail_bad_conf:
 221 fail_size_up_rings:
 222         sfc_log_init(sa, "failed (TxQ = %u, rc = %d)", sw_index, rc);
 223         return rc;
 224 }
 225
 226 void
 227 sfc_tx_qfini(struct sfc_adapter *sa, unsigned int sw_index)
 228 {
 229         struct sfc_txq_info *txq_info;
 230         struct sfc_txq *txq;
 231
 232         sfc_log_init(sa, "TxQ = %u", sw_index);
 233
 234         SFC_ASSERT(sw_index < sfc_sa2shared(sa)->txq_count);
 235         sa->eth_dev->data->tx_queues[sw_index] = NULL;
 236
 237         txq_info = &sfc_sa2shared(sa)->txq_info[sw_index];
 238
 239         SFC_ASSERT(txq_info->state == SFC_TXQ_INITIALIZED);
 240
 241         sa->priv.dp_tx->qdestroy(txq_info->dp);
 242         txq_info->dp = NULL;
 243
 244         txq_info->state &= ~SFC_TXQ_INITIALIZED;
 245         txq_info->entries = 0;
 246
 247         txq = &sa->txq_ctrl[sw_index];
 248
 249         sfc_dma_free(sa, &txq->mem);
 250
 251         sfc_ev_qfini(txq->evq);
 252         txq->evq = NULL;
 253 }
 254
 255 static int
 256 sfc_tx_qinit_info(struct sfc_adapter *sa, unsigned int sw_index)
 257 {
 258         sfc_log_init(sa, "TxQ = %u", sw_index);
 259
 260         return 0;
 261 }
 262
 263 static int
 264 sfc_tx_check_mode(struct sfc_adapter *sa, const struct rte_eth_txmode *txmode)
 265 {
 266         int rc = 0;
 267
 268         switch (txmode->mq_mode) {
 269         case ETH_MQ_TX_NONE:
 270                 break;
 271         default:
 272                 sfc_err(sa, "Tx multi-queue mode %u not supported",
 273                         txmode->mq_mode);
 274                 rc = EINVAL;
 275         }
 276
 277         /*
 278          * These features are claimed to be i40e-specific,
 279          * but it does make sense to double-check their absence
 280          */
 281         if (txmode->hw_vlan_reject_tagged) {
 282                 sfc_err(sa, "Rejecting tagged packets not supported");
 283                 rc = EINVAL;
 284         }
 285
 286         if (txmode->hw_vlan_reject_untagged) {
 287                 sfc_err(sa, "Rejecting untagged packets not supported");
 288                 rc = EINVAL;
 289         }
 290
 291         if (txmode->hw_vlan_insert_pvid) {
 292                 sfc_err(sa, "Port-based VLAN insertion not supported");
 293                 rc = EINVAL;
 294         }
 295
 296         return rc;
 297 }
 298
 299 /**
 300  * Destroy excess queues that are no longer needed after reconfiguration
 301  * or complete close.
 302  */
 303 static void
 304 sfc_tx_fini_queues(struct sfc_adapter *sa, unsigned int nb_tx_queues)
 305 {
 306         struct sfc_adapter_shared * const sas = sfc_sa2shared(sa);
 307         int sw_index;
 308
 309         SFC_ASSERT(nb_tx_queues <= sas->txq_count);
 310
 311         sw_index = sas->txq_count;
 312         while (--sw_index >= (int)nb_tx_queues) {
 313                 if (sas->txq_info[sw_index].state & SFC_TXQ_INITIALIZED)
 314                         sfc_tx_qfini(sa, sw_index);
 315         }
 316
 317         sas->txq_count = nb_tx_queues;
 318 }
 319
 320 int
 321 sfc_tx_configure(struct sfc_adapter *sa)
 322 {
 323         struct sfc_adapter_shared * const sas = sfc_sa2shared(sa);
 324         const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
 325         const struct rte_eth_conf *dev_conf = &sa->eth_dev->data->dev_conf;
 326         const unsigned int nb_tx_queues = sa->eth_dev->data->nb_tx_queues;
 327         int rc = 0;
 328
 329         sfc_log_init(sa, "nb_tx_queues=%u (old %u)",
 330                      nb_tx_queues, sas->txq_count);
 331
 332         /*
 333          * The datapath implementation assumes absence of boundary
 334          * limits on Tx DMA descriptors. Addition of these checks on
 335          * datapath would simply make the datapath slower.
 336          */
 337         if (encp->enc_tx_dma_desc_boundary != 0) {
 338                 rc = ENOTSUP;
 339                 goto fail_tx_dma_desc_boundary;
 340         }
 341
 342         rc = sfc_tx_check_mode(sa, &dev_conf->txmode);
 343         if (rc != 0)
 344                 goto fail_check_mode;
 345
 346         if (nb_tx_queues == sas->txq_count)
 347                 goto done;
 348
 349         if (sas->txq_info == NULL) {
 350                 sas->txq_info = rte_calloc_socket("sfc-txqs", nb_tx_queues,
 351                                                   sizeof(sas->txq_info[0]), 0,
 352                                                   sa->socket_id);
 353                 if (sas->txq_info == NULL)
 354                         goto fail_txqs_alloc;
 355
 356                 /*
 357                  * Allocate primary process only TxQ control from heap
 358                  * since it should not be shared.
 359                  */
 360                 rc = ENOMEM;
 361                 sa->txq_ctrl = calloc(nb_tx_queues, sizeof(sa->txq_ctrl[0]));
 362                 if (sa->txq_ctrl == NULL)
 363                         goto fail_txqs_ctrl_alloc;
 364         } else {
 365                 struct sfc_txq_info *new_txq_info;
 366                 struct sfc_txq *new_txq_ctrl;
 367
 368                 if (nb_tx_queues < sas->txq_count)
 369                         sfc_tx_fini_queues(sa, nb_tx_queues);
 370
 371                 new_txq_info =
 372                         rte_realloc(sas->txq_info,
 373                                     nb_tx_queues * sizeof(sas->txq_info[0]), 0);
 374                 if (new_txq_info == NULL && nb_tx_queues > 0)
 375                         goto fail_txqs_realloc;
 376
 377                 new_txq_ctrl = realloc(sa->txq_ctrl,
 378                                        nb_tx_queues * sizeof(sa->txq_ctrl[0]));
 379                 if (new_txq_ctrl == NULL && nb_tx_queues > 0)
 380                         goto fail_txqs_ctrl_realloc;
 381
 382                 sas->txq_info = new_txq_info;
 383                 sa->txq_ctrl = new_txq_ctrl;
 384                 if (nb_tx_queues > sas->txq_count) {
 385                         memset(&sas->txq_info[sas->txq_count], 0,
 386                                (nb_tx_queues - sas->txq_count) *
 387                                sizeof(sas->txq_info[0]));
 388                         memset(&sa->txq_ctrl[sas->txq_count], 0,
 389                                (nb_tx_queues - sas->txq_count) *
 390                                sizeof(sa->txq_ctrl[0]));
 391                 }
 392         }
 393
 394         while (sas->txq_count < nb_tx_queues) {
 395                 rc = sfc_tx_qinit_info(sa, sas->txq_count);
 396                 if (rc != 0)
 397                         goto fail_tx_qinit_info;
 398
 399                 sas->txq_count++;
 400         }
 401
 402 done:
 403         return 0;
 404
 405 fail_tx_qinit_info:
 406 fail_txqs_ctrl_realloc:
 407 fail_txqs_realloc:
 408 fail_txqs_ctrl_alloc:
 409 fail_txqs_alloc:
 410         sfc_tx_close(sa);
 411
 412 fail_check_mode:
 413 fail_tx_dma_desc_boundary:
 414         sfc_log_init(sa, "failed (rc = %d)", rc);
 415         return rc;
 416 }
 417
 418 void
 419 sfc_tx_close(struct sfc_adapter *sa)
 420 {
 421         sfc_tx_fini_queues(sa, 0);
 422
 423         free(sa->txq_ctrl);
 424         sa->txq_ctrl = NULL;
 425
 426         rte_free(sfc_sa2shared(sa)->txq_info);
 427         sfc_sa2shared(sa)->txq_info = NULL;
 428 }
 429
 430 int
 431 sfc_tx_qstart(struct sfc_adapter *sa, unsigned int sw_index)
 432 {
 433         struct sfc_adapter_shared * const sas = sfc_sa2shared(sa);
 434         uint64_t offloads_supported = sfc_tx_get_dev_offload_caps(sa) |
 435                                       sfc_tx_get_queue_offload_caps(sa);
 436         struct rte_eth_dev_data *dev_data;
 437         struct sfc_txq_info *txq_info;
 438         struct sfc_txq *txq;
 439         struct sfc_evq *evq;
 440         uint16_t flags = 0;
 441         unsigned int desc_index;
 442         int rc = 0;
 443
 444         sfc_log_init(sa, "TxQ = %u", sw_index);
 445
 446         SFC_ASSERT(sw_index < sas->txq_count);
 447         txq_info = &sas->txq_info[sw_index];
 448
 449         SFC_ASSERT(txq_info->state == SFC_TXQ_INITIALIZED);
 450
 451         txq = &sa->txq_ctrl[sw_index];
 452         evq = txq->evq;
 453
 454         rc = sfc_ev_qstart(evq, sfc_evq_index_by_txq_sw_index(sa, sw_index));
 455         if (rc != 0)
 456                 goto fail_ev_qstart;
 457
 458         if (txq_info->offloads & DEV_TX_OFFLOAD_IPV4_CKSUM)
 459                 flags |= EFX_TXQ_CKSUM_IPV4;
 460
 461         if (txq_info->offloads & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)
 462                 flags |= EFX_TXQ_CKSUM_INNER_IPV4;
 463
 464         if ((txq_info->offloads & DEV_TX_OFFLOAD_TCP_CKSUM) ||
 465             (txq_info->offloads & DEV_TX_OFFLOAD_UDP_CKSUM)) {
 466                 flags |= EFX_TXQ_CKSUM_TCPUDP;
 467
 468                 if (offloads_supported & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)
 469                         flags |= EFX_TXQ_CKSUM_INNER_TCPUDP;
 470         }
 471
 472         if (txq_info->offloads & DEV_TX_OFFLOAD_TCP_TSO)
 473                 flags |= EFX_TXQ_FATSOV2;
 474
 475         rc = efx_tx_qcreate(sa->nic, txq->hw_index, 0, &txq->mem,
 476                             txq_info->entries, 0 /* not used on EF10 */,
 477                             flags, evq->common,
 478                             &txq->common, &desc_index);
 479         if (rc != 0) {
 480                 if (sa->tso && (rc == ENOSPC))
 481                         sfc_err(sa, "ran out of TSO contexts");
 482
 483                 goto fail_tx_qcreate;
 484         }
 485
 486         efx_tx_qenable(txq->common);
 487
 488         txq_info->state |= SFC_TXQ_STARTED;
 489
 490         rc = sa->priv.dp_tx->qstart(txq_info->dp, evq->read_ptr, desc_index);
 491         if (rc != 0)
 492                 goto fail_dp_qstart;
 493
 494         /*
 495          * It seems to be used by DPDK for debug purposes only ('rte_ether')
 496          */
 497         dev_data = sa->eth_dev->data;
 498         dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STARTED;
 499
 500         return 0;
 501
 502 fail_dp_qstart:
 503         txq_info->state = SFC_TXQ_INITIALIZED;
 504         efx_tx_qdestroy(txq->common);
 505
 506 fail_tx_qcreate:
 507         sfc_ev_qstop(evq);
 508
 509 fail_ev_qstart:
 510         return rc;
 511 }
 512
 513 void
 514 sfc_tx_qstop(struct sfc_adapter *sa, unsigned int sw_index)
 515 {
 516         struct sfc_adapter_shared * const sas = sfc_sa2shared(sa);
 517         struct rte_eth_dev_data *dev_data;
 518         struct sfc_txq_info *txq_info;
 519         struct sfc_txq *txq;
 520         unsigned int retry_count;
 521         unsigned int wait_count;
 522         int rc;
 523
 524         sfc_log_init(sa, "TxQ = %u", sw_index);
 525
 526         SFC_ASSERT(sw_index < sas->txq_count);
 527         txq_info = &sas->txq_info[sw_index];
 528
 529         if (txq_info->state == SFC_TXQ_INITIALIZED)
 530                 return;
 531
 532         SFC_ASSERT(txq_info->state & SFC_TXQ_STARTED);
 533
 534         txq = &sa->txq_ctrl[sw_index];
 535         sa->priv.dp_tx->qstop(txq_info->dp, &txq->evq->read_ptr);
 536
 537         /*
 538          * Retry TX queue flushing in case of flush failed or
 539          * timeout; in the worst case it can delay for 6 seconds
 540          */
 541         for (retry_count = 0;
 542              ((txq_info->state & SFC_TXQ_FLUSHED) == 0) &&
 543              (retry_count < SFC_TX_QFLUSH_ATTEMPTS);
 544              ++retry_count) {
 545                 rc = efx_tx_qflush(txq->common);
 546                 if (rc != 0) {
 547                         txq_info->state |= (rc == EALREADY) ?
 548                                 SFC_TXQ_FLUSHED : SFC_TXQ_FLUSH_FAILED;
 549                         break;
 550                 }
 551
 552                 /*
 553                  * Wait for TX queue flush done or flush failed event at least
 554                  * SFC_TX_QFLUSH_POLL_WAIT_MS milliseconds and not more
 555                  * than 2 seconds (SFC_TX_QFLUSH_POLL_WAIT_MS multiplied
 556                  * by SFC_TX_QFLUSH_POLL_ATTEMPTS)
 557                  */
 558                 wait_count = 0;
 559                 do {
 560                         rte_delay_ms(SFC_TX_QFLUSH_POLL_WAIT_MS);
 561                         sfc_ev_qpoll(txq->evq);
 562                 } while ((txq_info->state & SFC_TXQ_FLUSHING) &&
 563                          wait_count++ < SFC_TX_QFLUSH_POLL_ATTEMPTS);
 564
 565                 if (txq_info->state & SFC_TXQ_FLUSHING)
 566                         sfc_err(sa, "TxQ %u flush timed out", sw_index);
 567
 568                 if (txq_info->state & SFC_TXQ_FLUSHED)
 569                         sfc_notice(sa, "TxQ %u flushed", sw_index);
 570         }
 571
 572         sa->priv.dp_tx->qreap(txq_info->dp);
 573
 574         txq_info->state = SFC_TXQ_INITIALIZED;
 575
 576         efx_tx_qdestroy(txq->common);
 577
 578         sfc_ev_qstop(txq->evq);
 579
 580         /*
 581          * It seems to be used by DPDK for debug purposes only ('rte_ether')
 582          */
 583         dev_data = sa->eth_dev->data;
 584         dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STOPPED;
 585 }
 586
 587 int
 588 sfc_tx_start(struct sfc_adapter *sa)
 589 {
 590         struct sfc_adapter_shared * const sas = sfc_sa2shared(sa);
 591         unsigned int sw_index;
 592         int rc = 0;
 593
 594         sfc_log_init(sa, "txq_count = %u", sas->txq_count);
 595
 596         if (sa->tso) {
 597                 if (!efx_nic_cfg_get(sa->nic)->enc_fw_assisted_tso_v2_enabled) {
 598                         sfc_warn(sa, "TSO support was unable to be restored");
 599                         sa->tso = B_FALSE;
 600                 }
 601         }
 602
 603         rc = efx_tx_init(sa->nic);
 604         if (rc != 0)
 605                 goto fail_efx_tx_init;
 606
 607         for (sw_index = 0; sw_index < sas->txq_count; ++sw_index) {
 608                 if (sas->txq_info[sw_index].state == SFC_TXQ_INITIALIZED &&
 609                     (!(sas->txq_info[sw_index].deferred_start) ||
 610                      sas->txq_info[sw_index].deferred_started)) {
 611                         rc = sfc_tx_qstart(sa, sw_index);
 612                         if (rc != 0)
 613                                 goto fail_tx_qstart;
 614                 }
 615         }
 616
 617         return 0;
 618
 619 fail_tx_qstart:
 620         while (sw_index-- > 0)
 621                 sfc_tx_qstop(sa, sw_index);
 622
 623         efx_tx_fini(sa->nic);
 624
 625 fail_efx_tx_init:
 626         sfc_log_init(sa, "failed (rc = %d)", rc);
 627         return rc;
 628 }
 629
 630 void
 631 sfc_tx_stop(struct sfc_adapter *sa)
 632 {
 633         struct sfc_adapter_shared * const sas = sfc_sa2shared(sa);
 634         unsigned int sw_index;
 635
 636         sfc_log_init(sa, "txq_count = %u", sas->txq_count);
 637
 638         sw_index = sas->txq_count;
 639         while (sw_index-- > 0) {
 640                 if (sas->txq_info[sw_index].state & SFC_TXQ_STARTED)
 641                         sfc_tx_qstop(sa, sw_index);
 642         }
 643
 644         efx_tx_fini(sa->nic);
 645 }
 646
 647 static void
 648 sfc_efx_tx_reap(struct sfc_efx_txq *txq)
 649 {
 650         unsigned int completed;
 651
 652         sfc_ev_qpoll(txq->evq);
 653
 654         for (completed = txq->completed;
 655              completed != txq->pending; completed++) {
 656                 struct sfc_efx_tx_sw_desc *txd;
 657
 658                 txd = &txq->sw_ring[completed & txq->ptr_mask];
 659
 660                 if (txd->mbuf != NULL) {
 661                         rte_pktmbuf_free(txd->mbuf);
 662                         txd->mbuf = NULL;
 663                 }
 664         }
 665
 666         txq->completed = completed;
 667 }
 668
 669 /*
 670  * The function is used to insert or update VLAN tag;
 671  * the firmware has state of the firmware tag to insert per TxQ
 672  * (controlled by option descriptors), hence, if the tag of the
 673  * packet to be sent is different from one remembered by the firmware,
 674  * the function will update it
 675  */
 676 static unsigned int
 677 sfc_efx_tx_maybe_insert_tag(struct sfc_efx_txq *txq, struct rte_mbuf *m,
 678                             efx_desc_t **pend)
 679 {
 680         uint16_t this_tag = ((m->ol_flags & PKT_TX_VLAN_PKT) ?
 681                              m->vlan_tci : 0);
 682
 683         if (this_tag == txq->hw_vlan_tci)
 684                 return 0;
 685
 686         /*
 687          * The expression inside SFC_ASSERT() is not desired to be checked in
 688          * a non-debug build because it might be too expensive on the data path
 689          */
 690         SFC_ASSERT(efx_nic_cfg_get(txq->evq->sa->nic)->enc_hw_tx_insert_vlan_enabled);
 691
 692         efx_tx_qdesc_vlantci_create(txq->common, rte_cpu_to_be_16(this_tag),
 693                                     *pend);
 694         (*pend)++;
 695         txq->hw_vlan_tci = this_tag;
 696
 697         return 1;
 698 }
 699
 700 static uint16_t
 701 sfc_efx_prepare_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 702                      uint16_t nb_pkts)
 703 {
 704         struct sfc_dp_txq *dp_txq = tx_queue;
 705         struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq);
 706         const efx_nic_cfg_t *encp = efx_nic_cfg_get(txq->evq->sa->nic);
 707         uint16_t i;
 708
 709         for (i = 0; i < nb_pkts; i++) {
 710                 int ret;
 711
 712                 /*
 713                  * EFX Tx datapath may require extra VLAN descriptor if VLAN
 714                  * insertion offload is requested regardless the offload
 715                  * requested/supported.
 716                  */
 717                 ret = sfc_dp_tx_prepare_pkt(tx_pkts[i],
 718                                 encp->enc_tx_tso_tcp_header_offset_limit,
 719                                 txq->max_fill_level, EFX_TX_FATSOV2_OPT_NDESCS,
 720                                 1);
 721                 if (unlikely(ret != 0)) {
 722                         rte_errno = ret;
 723                         break;
 724                 }
 725         }
 726
 727         return i;
 728 }
 729
 730 static uint16_t
 731 sfc_efx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 732 {
 733         struct sfc_dp_txq *dp_txq = (struct sfc_dp_txq *)tx_queue;
 734         struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq);
 735         unsigned int added = txq->added;
 736         unsigned int pushed = added;
 737         unsigned int pkts_sent = 0;
 738         efx_desc_t *pend = &txq->pend_desc[0];
 739         const unsigned int hard_max_fill = txq->max_fill_level;
 740         const unsigned int soft_max_fill = hard_max_fill - txq->free_thresh;
 741         unsigned int fill_level = added - txq->completed;
 742         boolean_t reap_done;
 743         int rc __rte_unused;
 744         struct rte_mbuf **pktp;
 745
 746         if (unlikely((txq->flags & SFC_EFX_TXQ_FLAG_RUNNING) == 0))
 747                 goto done;
 748
 749         /*
 750          * If insufficient space for a single packet is present,
 751          * we should reap; otherwise, we shouldn't do that all the time
 752          * to avoid latency increase
 753          */
 754         reap_done = (fill_level > soft_max_fill);
 755
 756         if (reap_done) {
 757                 sfc_efx_tx_reap(txq);
 758                 /*
 759                  * Recalculate fill level since 'txq->completed'
 760                  * might have changed on reap
 761                  */
 762                 fill_level = added - txq->completed;
 763         }
 764
 765         for (pkts_sent = 0, pktp = &tx_pkts[0];
 766              (pkts_sent < nb_pkts) && (fill_level <= soft_max_fill);
 767              pkts_sent++, pktp++) {
 768                 uint16_t                hw_vlan_tci_prev = txq->hw_vlan_tci;
 769                 struct rte_mbuf         *m_seg = *pktp;
 770                 size_t                  pkt_len = m_seg->pkt_len;
 771                 unsigned int            pkt_descs = 0;
 772                 size_t                  in_off = 0;
 773
 774                 /*
 775                  * Here VLAN TCI is expected to be zero in case if no
 776                  * DEV_TX_OFFLOAD_VLAN_INSERT capability is advertised;
 777                  * if the calling app ignores the absence of
 778                  * DEV_TX_OFFLOAD_VLAN_INSERT and pushes VLAN TCI, then
 779                  * TX_ERROR will occur
 780                  */
 781                 pkt_descs += sfc_efx_tx_maybe_insert_tag(txq, m_seg, &pend);
 782
 783                 if (m_seg->ol_flags & PKT_TX_TCP_SEG) {
 784                         /*
 785                          * We expect correct 'pkt->l[2, 3, 4]_len' values
 786                          * to be set correctly by the caller
 787                          */
 788                         if (sfc_efx_tso_do(txq, added, &m_seg, &in_off, &pend,
 789                                            &pkt_descs, &pkt_len) != 0) {
 790                                 /* We may have reached this place if packet
 791                                  * header linearization is needed but the
 792                                  * header length is greater than
 793                                  * SFC_TSOH_STD_LEN
 794                                  *
 795                                  * We will deceive RTE saying that we have sent
 796                                  * the packet, but we will actually drop it.
 797                                  * Hence, we should revert 'pend' to the
 798                                  * previous state (in case we have added
 799                                  * VLAN descriptor) and start processing
 800                                  * another one packet. But the original
 801                                  * mbuf shouldn't be orphaned
 802                                  */
 803                                 pend -= pkt_descs;
 804                                 txq->hw_vlan_tci = hw_vlan_tci_prev;
 805
 806                                 rte_pktmbuf_free(*pktp);
 807
 808                                 continue;
 809                         }
 810
 811                         /*
 812                          * We've only added 2 FATSOv2 option descriptors
 813                          * and 1 descriptor for the linearized packet header.
 814                          * The outstanding work will be done in the same manner
 815                          * as for the usual non-TSO path
 816                          */
 817                 }
 818
 819                 for (; m_seg != NULL; m_seg = m_seg->next) {
 820                         efsys_dma_addr_t        next_frag;
 821                         size_t                  seg_len;
 822
 823                         seg_len = m_seg->data_len;
 824                         next_frag = rte_mbuf_data_iova(m_seg);
 825
 826                         /*
 827                          * If we've started TSO transaction few steps earlier,
 828                          * we'll skip packet header using an offset in the
 829                          * current segment (which has been set to the
 830                          * first one containing payload)
 831                          */
 832                         seg_len -= in_off;
 833                         next_frag += in_off;
 834                         in_off = 0;
 835
 836                         do {
 837                                 efsys_dma_addr_t        frag_addr = next_frag;
 838                                 size_t                  frag_len;
 839
 840                                 /*
 841                                  * It is assumed here that there is no
 842                                  * limitation on address boundary
 843                                  * crossing by DMA descriptor.
 844                                  */
 845                                 frag_len = MIN(seg_len, txq->dma_desc_size_max);
 846                                 next_frag += frag_len;
 847                                 seg_len -= frag_len;
 848                                 pkt_len -= frag_len;
 849
 850                                 efx_tx_qdesc_dma_create(txq->common,
 851                                                         frag_addr, frag_len,
 852                                                         (pkt_len == 0),
 853                                                         pend++);
 854
 855                                 pkt_descs++;
 856                         } while (seg_len != 0);
 857                 }
 858
 859                 added += pkt_descs;
 860
 861                 fill_level += pkt_descs;
 862                 if (unlikely(fill_level > hard_max_fill)) {
 863                         /*
 864                          * Our estimation for maximum number of descriptors
 865                          * required to send a packet seems to be wrong.
 866                          * Try to reap (if we haven't yet).
 867                          */
 868                         if (!reap_done) {
 869                                 sfc_efx_tx_reap(txq);
 870                                 reap_done = B_TRUE;
 871                                 fill_level = added - txq->completed;
 872                                 if (fill_level > hard_max_fill) {
 873                                         pend -= pkt_descs;
 874                                         txq->hw_vlan_tci = hw_vlan_tci_prev;
 875                                         break;
 876                                 }
 877                         } else {
 878                                 pend -= pkt_descs;
 879                                 txq->hw_vlan_tci = hw_vlan_tci_prev;
 880                                 break;
 881                         }
 882                 }
 883
 884                 /* Assign mbuf to the last used desc */
 885                 txq->sw_ring[(added - 1) & txq->ptr_mask].mbuf = *pktp;
 886         }
 887
 888         if (likely(pkts_sent > 0)) {
 889                 rc = efx_tx_qdesc_post(txq->common, txq->pend_desc,
 890                                        pend - &txq->pend_desc[0],
 891                                        txq->completed, &txq->added);
 892                 SFC_ASSERT(rc == 0);
 893
 894                 if (likely(pushed != txq->added))
 895                         efx_tx_qpush(txq->common, txq->added, pushed);
 896         }
 897
 898 #if SFC_TX_XMIT_PKTS_REAP_AT_LEAST_ONCE
 899         if (!reap_done)
 900                 sfc_efx_tx_reap(txq);
 901 #endif
 902
 903 done:
 904         return pkts_sent;
 905 }
 906
 907 const struct sfc_dp_tx *
 908 sfc_dp_tx_by_dp_txq(const struct sfc_dp_txq *dp_txq)
 909 {
 910         const struct sfc_dp_queue *dpq = &dp_txq->dpq;
 911         struct rte_eth_dev *eth_dev;
 912         struct sfc_adapter_priv *sap;
 913
 914         SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id));
 915         eth_dev = &rte_eth_devices[dpq->port_id];
 916
 917         sap = sfc_adapter_priv_by_eth_dev(eth_dev);
 918
 919         return sap->dp_tx;
 920 }
 921
 922 struct sfc_txq_info *
 923 sfc_txq_info_by_dp_txq(const struct sfc_dp_txq *dp_txq)
 924 {
 925         const struct sfc_dp_queue *dpq = &dp_txq->dpq;
 926         struct rte_eth_dev *eth_dev;
 927         struct sfc_adapter_shared *sas;
 928
 929         SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id));
 930         eth_dev = &rte_eth_devices[dpq->port_id];
 931
 932         sas = sfc_adapter_shared_by_eth_dev(eth_dev);
 933
 934         SFC_ASSERT(dpq->queue_id < sas->txq_count);
 935         return &sas->txq_info[dpq->queue_id];
 936 }
 937
 938 struct sfc_txq *
 939 sfc_txq_by_dp_txq(const struct sfc_dp_txq *dp_txq)
 940 {
 941         const struct sfc_dp_queue *dpq = &dp_txq->dpq;
 942         struct rte_eth_dev *eth_dev;
 943         struct sfc_adapter *sa;
 944
 945         SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id));
 946         eth_dev = &rte_eth_devices[dpq->port_id];
 947
 948         sa = sfc_adapter_by_eth_dev(eth_dev);
 949
 950         SFC_ASSERT(dpq->queue_id < sfc_sa2shared(sa)->txq_count);
 951         return &sa->txq_ctrl[dpq->queue_id];
 952 }
 953
 954 static sfc_dp_tx_qsize_up_rings_t sfc_efx_tx_qsize_up_rings;
 955 static int
 956 sfc_efx_tx_qsize_up_rings(uint16_t nb_tx_desc,
 957                           __rte_unused struct sfc_dp_tx_hw_limits *limits,
 958                           unsigned int *txq_entries,
 959                           unsigned int *evq_entries,
 960                           unsigned int *txq_max_fill_level)
 961 {
 962         *txq_entries = nb_tx_desc;
 963         *evq_entries = nb_tx_desc;
 964         *txq_max_fill_level = EFX_TXQ_LIMIT(*txq_entries);
 965         return 0;
 966 }
 967
 968 static sfc_dp_tx_qcreate_t sfc_efx_tx_qcreate;
 969 static int
 970 sfc_efx_tx_qcreate(uint16_t port_id, uint16_t queue_id,
 971                    const struct rte_pci_addr *pci_addr,
 972                    int socket_id,
 973                    const struct sfc_dp_tx_qcreate_info *info,
 974                    struct sfc_dp_txq **dp_txqp)
 975 {
 976         struct sfc_efx_txq *txq;
 977         struct sfc_txq *ctrl_txq;
 978         int rc;
 979
 980         rc = ENOMEM;
 981         txq = rte_zmalloc_socket("sfc-efx-txq", sizeof(*txq),
 982                                  RTE_CACHE_LINE_SIZE, socket_id);
 983         if (txq == NULL)
 984                 goto fail_txq_alloc;
 985
 986         sfc_dp_queue_init(&txq->dp.dpq, port_id, queue_id, pci_addr);
 987
 988         rc = ENOMEM;
 989         txq->pend_desc = rte_calloc_socket("sfc-efx-txq-pend-desc",
 990                                            EFX_TXQ_LIMIT(info->txq_entries),
 991                                            sizeof(*txq->pend_desc), 0,
 992                                            socket_id);
 993         if (txq->pend_desc == NULL)
 994                 goto fail_pend_desc_alloc;
 995
 996         rc = ENOMEM;
 997         txq->sw_ring = rte_calloc_socket("sfc-efx-txq-sw_ring",
 998                                          info->txq_entries,
 999                                          sizeof(*txq->sw_ring),
1000                                          RTE_CACHE_LINE_SIZE, socket_id);
1001         if (txq->sw_ring == NULL)
1002                 goto fail_sw_ring_alloc;
1003
1004         ctrl_txq = sfc_txq_by_dp_txq(&txq->dp);
1005         if (ctrl_txq->evq->sa->tso) {
1006                 rc = sfc_efx_tso_alloc_tsoh_objs(txq->sw_ring,
1007                                                  info->txq_entries, socket_id);
1008                 if (rc != 0)
1009                         goto fail_alloc_tsoh_objs;
1010         }
1011
1012         txq->evq = ctrl_txq->evq;
1013         txq->ptr_mask = info->txq_entries - 1;
1014         txq->max_fill_level = info->max_fill_level;
1015         txq->free_thresh = info->free_thresh;
1016         txq->dma_desc_size_max = info->dma_desc_size_max;
1017
1018         *dp_txqp = &txq->dp;
1019         return 0;
1020
1021 fail_alloc_tsoh_objs:
1022         rte_free(txq->sw_ring);
1023
1024 fail_sw_ring_alloc:
1025         rte_free(txq->pend_desc);
1026
1027 fail_pend_desc_alloc:
1028         rte_free(txq);
1029
1030 fail_txq_alloc:
1031         return rc;
1032 }
1033
1034 static sfc_dp_tx_qdestroy_t sfc_efx_tx_qdestroy;
1035 static void
1036 sfc_efx_tx_qdestroy(struct sfc_dp_txq *dp_txq)
1037 {
1038         struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq);
1039
1040         sfc_efx_tso_free_tsoh_objs(txq->sw_ring, txq->ptr_mask + 1);
1041         rte_free(txq->sw_ring);
1042         rte_free(txq->pend_desc);
1043         rte_free(txq);
1044 }
1045
1046 static sfc_dp_tx_qstart_t sfc_efx_tx_qstart;
1047 static int
1048 sfc_efx_tx_qstart(struct sfc_dp_txq *dp_txq,
1049                   __rte_unused unsigned int evq_read_ptr,
1050                   unsigned int txq_desc_index)
1051 {
1052         /* libefx-based datapath is specific to libefx-based PMD */
1053         struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq);
1054         struct sfc_txq *ctrl_txq = sfc_txq_by_dp_txq(dp_txq);
1055
1056         txq->common = ctrl_txq->common;
1057
1058         txq->pending = txq->completed = txq->added = txq_desc_index;
1059         txq->hw_vlan_tci = 0;
1060
1061         txq->flags |= (SFC_EFX_TXQ_FLAG_STARTED | SFC_EFX_TXQ_FLAG_RUNNING);
1062
1063         return 0;
1064 }
1065
1066 static sfc_dp_tx_qstop_t sfc_efx_tx_qstop;
1067 static void
1068 sfc_efx_tx_qstop(struct sfc_dp_txq *dp_txq,
1069                  __rte_unused unsigned int *evq_read_ptr)
1070 {
1071         struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq);
1072
1073         txq->flags &= ~SFC_EFX_TXQ_FLAG_RUNNING;
1074 }
1075
1076 static sfc_dp_tx_qreap_t sfc_efx_tx_qreap;
1077 static void
1078 sfc_efx_tx_qreap(struct sfc_dp_txq *dp_txq)
1079 {
1080         struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq);
1081         unsigned int txds;
1082
1083         sfc_efx_tx_reap(txq);
1084
1085         for (txds = 0; txds <= txq->ptr_mask; txds++) {
1086                 if (txq->sw_ring[txds].mbuf != NULL) {
1087                         rte_pktmbuf_free(txq->sw_ring[txds].mbuf);
1088                         txq->sw_ring[txds].mbuf = NULL;
1089                 }
1090         }
1091
1092         txq->flags &= ~SFC_EFX_TXQ_FLAG_STARTED;
1093 }
1094
1095 static sfc_dp_tx_qdesc_status_t sfc_efx_tx_qdesc_status;
1096 static int
1097 sfc_efx_tx_qdesc_status(struct sfc_dp_txq *dp_txq, uint16_t offset)
1098 {
1099         struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq);
1100
1101         if (unlikely(offset > txq->ptr_mask))
1102                 return -EINVAL;
1103
1104         if (unlikely(offset >= txq->max_fill_level))
1105                 return RTE_ETH_TX_DESC_UNAVAIL;
1106
1107         /*
1108          * Poll EvQ to derive up-to-date 'txq->pending' figure;
1109          * it is required for the queue to be running, but the
1110          * check is omitted because API design assumes that it
1111          * is the duty of the caller to satisfy all conditions
1112          */
1113         SFC_ASSERT((txq->flags & SFC_EFX_TXQ_FLAG_RUNNING) ==
1114                    SFC_EFX_TXQ_FLAG_RUNNING);
1115         sfc_ev_qpoll(txq->evq);
1116
1117         /*
1118          * Ring tail is 'txq->pending', and although descriptors
1119          * between 'txq->completed' and 'txq->pending' are still
1120          * in use by the driver, they should be reported as DONE
1121          */
1122         if (unlikely(offset < (txq->added - txq->pending)))
1123                 return RTE_ETH_TX_DESC_FULL;
1124
1125         /*
1126          * There is no separate return value for unused descriptors;
1127          * the latter will be reported as DONE because genuine DONE
1128          * descriptors will be freed anyway in SW on the next burst
1129          */
1130         return RTE_ETH_TX_DESC_DONE;
1131 }
1132
1133 struct sfc_dp_tx sfc_efx_tx = {
1134         .dp = {
1135                 .name           = SFC_KVARG_DATAPATH_EFX,
1136                 .type           = SFC_DP_TX,
1137                 .hw_fw_caps     = 0,
1138         },
1139         .features               = SFC_DP_TX_FEAT_VLAN_INSERT |
1140                                   SFC_DP_TX_FEAT_TSO |
1141                                   SFC_DP_TX_FEAT_MULTI_POOL |
1142                                   SFC_DP_TX_FEAT_REFCNT |
1143                                   SFC_DP_TX_FEAT_MULTI_SEG,
1144         .qsize_up_rings         = sfc_efx_tx_qsize_up_rings,
1145         .qcreate                = sfc_efx_tx_qcreate,
1146         .qdestroy               = sfc_efx_tx_qdestroy,
1147         .qstart                 = sfc_efx_tx_qstart,
1148         .qstop                  = sfc_efx_tx_qstop,
1149         .qreap                  = sfc_efx_tx_qreap,
1150         .qdesc_status           = sfc_efx_tx_qdesc_status,
1151         .pkt_prepare            = sfc_efx_prepare_pkts,
1152         .pkt_burst              = sfc_efx_xmit_pkts,
1153 };