drivers/net/nfp/nfp_rxtx.c

   1 /* SPDX-License-Identifier: BSD-3-Clause
   2  * Copyright (c) 2014-2021 Netronome Systems, Inc.
   3  * All rights reserved.
   4  *
   5  * Small portions derived from code Copyright(c) 2010-2015 Intel Corporation.
   6  */
   7
   8 /*
   9  * vim:shiftwidth=8:noexpandtab
  10  *
  11  * @file dpdk/pmd/nfp_rxtx.c
  12  *
  13  * Netronome vNIC DPDK Poll-Mode Driver: Rx/Tx functions
  14  */
  15
  16 #include <ethdev_driver.h>
  17 #include <ethdev_pci.h>
  18
  19 #include "nfp_common.h"
  20 #include "nfp_rxtx.h"
  21 #include "nfp_logs.h"
  22 #include "nfp_ctrl.h"
  23
  24 /* Prototypes */
  25 static int nfp_net_rx_fill_freelist(struct nfp_net_rxq *rxq);
  26 static inline void nfp_net_mbuf_alloc_failed(struct nfp_net_rxq *rxq);
  27 static inline void nfp_net_set_hash(struct nfp_net_rxq *rxq,
  28                                     struct nfp_net_rx_desc *rxd,
  29                                     struct rte_mbuf *mbuf);
  30 static inline void nfp_net_rx_cksum(struct nfp_net_rxq *rxq,
  31                                     struct nfp_net_rx_desc *rxd,
  32                                     struct rte_mbuf *mb);
  33 static void nfp_net_rx_queue_release_mbufs(struct nfp_net_rxq *rxq);
  34 static int nfp_net_tx_free_bufs(struct nfp_net_txq *txq);
  35 static void nfp_net_tx_queue_release_mbufs(struct nfp_net_txq *txq);
  36 static inline uint32_t nfp_free_tx_desc(struct nfp_net_txq *txq);
  37 static inline uint32_t nfp_net_txq_full(struct nfp_net_txq *txq);
  38 static inline void nfp_net_tx_tso(struct nfp_net_txq *txq,
  39                                   struct nfp_net_tx_desc *txd,
  40                                   struct rte_mbuf *mb);
  41 static inline void nfp_net_tx_cksum(struct nfp_net_txq *txq,
  42                                     struct nfp_net_tx_desc *txd,
  43                                     struct rte_mbuf *mb);
  44
  45 static int
  46 nfp_net_rx_fill_freelist(struct nfp_net_rxq *rxq)
  47 {
  48         struct nfp_net_rx_buff *rxe = rxq->rxbufs;
  49         uint64_t dma_addr;
  50         unsigned int i;
  51
  52         PMD_RX_LOG(DEBUG, "Fill Rx Freelist for %u descriptors",
  53                    rxq->rx_count);
  54
  55         for (i = 0; i < rxq->rx_count; i++) {
  56                 struct nfp_net_rx_desc *rxd;
  57                 struct rte_mbuf *mbuf = rte_pktmbuf_alloc(rxq->mem_pool);
  58
  59                 if (mbuf == NULL) {
  60                         PMD_DRV_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
  61                                 (unsigned int)rxq->qidx);
  62                         return -ENOMEM;
  63                 }
  64
  65                 dma_addr = rte_cpu_to_le_64(RTE_MBUF_DMA_ADDR_DEFAULT(mbuf));
  66
  67                 rxd = &rxq->rxds[i];
  68                 rxd->fld.dd = 0;
  69                 rxd->fld.dma_addr_hi = (dma_addr >> 32) & 0xff;
  70                 rxd->fld.dma_addr_lo = dma_addr & 0xffffffff;
  71                 rxe[i].mbuf = mbuf;
  72                 PMD_RX_LOG(DEBUG, "[%d]: %" PRIx64, i, dma_addr);
  73         }
  74
  75         /* Make sure all writes are flushed before telling the hardware */
  76         rte_wmb();
  77
  78         /* Not advertising the whole ring as the firmware gets confused if so */
  79         PMD_RX_LOG(DEBUG, "Increment FL write pointer in %u",
  80                    rxq->rx_count - 1);
  81
  82         nfp_qcp_ptr_add(rxq->qcp_fl, NFP_QCP_WRITE_PTR, rxq->rx_count - 1);
  83
  84         return 0;
  85 }
  86
  87 int
  88 nfp_net_rx_freelist_setup(struct rte_eth_dev *dev)
  89 {
  90         int i;
  91
  92         for (i = 0; i < dev->data->nb_rx_queues; i++) {
  93                 if (nfp_net_rx_fill_freelist(dev->data->rx_queues[i]) < 0)
  94                         return -1;
  95         }
  96         return 0;
  97 }
  98
  99 uint32_t
 100 nfp_net_rx_queue_count(struct rte_eth_dev *dev, uint16_t queue_idx)
 101 {
 102         struct nfp_net_rxq *rxq;
 103         struct nfp_net_rx_desc *rxds;
 104         uint32_t idx;
 105         uint32_t count;
 106
 107         rxq = (struct nfp_net_rxq *)dev->data->rx_queues[queue_idx];
 108
 109         idx = rxq->rd_p;
 110
 111         count = 0;
 112
 113         /*
 114          * Other PMDs are just checking the DD bit in intervals of 4
 115          * descriptors and counting all four if the first has the DD
 116          * bit on. Of course, this is not accurate but can be good for
 117          * performance. But ideally that should be done in descriptors
 118          * chunks belonging to the same cache line
 119          */
 120
 121         while (count < rxq->rx_count) {
 122                 rxds = &rxq->rxds[idx];
 123                 if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0)
 124                         break;
 125
 126                 count++;
 127                 idx++;
 128
 129                 /* Wrapping? */
 130                 if ((idx) == rxq->rx_count)
 131                         idx = 0;
 132         }
 133
 134         return count;
 135 }
 136
 137 static inline void
 138 nfp_net_mbuf_alloc_failed(struct nfp_net_rxq *rxq)
 139 {
 140         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
 141 }
 142
 143 /*
 144  * nfp_net_set_hash - Set mbuf hash data
 145  *
 146  * The RSS hash and hash-type are pre-pended to the packet data.
 147  * Extract and decode it and set the mbuf fields.
 148  */
 149 static inline void
 150 nfp_net_set_hash(struct nfp_net_rxq *rxq, struct nfp_net_rx_desc *rxd,
 151                  struct rte_mbuf *mbuf)
 152 {
 153         struct nfp_net_hw *hw = rxq->hw;
 154         uint8_t *meta_offset;
 155         uint32_t meta_info;
 156         uint32_t hash = 0;
 157         uint32_t hash_type = 0;
 158
 159         if (!(hw->ctrl & NFP_NET_CFG_CTRL_RSS))
 160                 return;
 161
 162         /* this is true for new firmwares */
 163         if (likely(((hw->cap & NFP_NET_CFG_CTRL_RSS2) ||
 164             (NFD_CFG_MAJOR_VERSION_of(hw->ver) == 4)) &&
 165              NFP_DESC_META_LEN(rxd))) {
 166                 /*
 167                  * new metadata api:
 168                  * <----  32 bit  ----->
 169                  * m    field type word
 170                  * e     data field #2
 171                  * t     data field #1
 172                  * a     data field #0
 173                  * ====================
 174                  *    packet data
 175                  *
 176                  * Field type word contains up to 8 4bit field types
 177                  * A 4bit field type refers to a data field word
 178                  * A data field word can have several 4bit field types
 179                  */
 180                 meta_offset = rte_pktmbuf_mtod(mbuf, uint8_t *);
 181                 meta_offset -= NFP_DESC_META_LEN(rxd);
 182                 meta_info = rte_be_to_cpu_32(*(uint32_t *)meta_offset);
 183                 meta_offset += 4;
 184                 /* NFP PMD just supports metadata for hashing */
 185                 switch (meta_info & NFP_NET_META_FIELD_MASK) {
 186                 case NFP_NET_META_HASH:
 187                         /* next field type is about the hash type */
 188                         meta_info >>= NFP_NET_META_FIELD_SIZE;
 189                         /* hash value is in the data field */
 190                         hash = rte_be_to_cpu_32(*(uint32_t *)meta_offset);
 191                         hash_type = meta_info & NFP_NET_META_FIELD_MASK;
 192                         break;
 193                 default:
 194                         /* Unsupported metadata can be a performance issue */
 195                         return;
 196                 }
 197         } else {
 198                 if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS))
 199                         return;
 200
 201                 hash = rte_be_to_cpu_32(*(uint32_t *)NFP_HASH_OFFSET);
 202                 hash_type = rte_be_to_cpu_32(*(uint32_t *)NFP_HASH_TYPE_OFFSET);
 203         }
 204
 205         mbuf->hash.rss = hash;
 206         mbuf->ol_flags |= PKT_RX_RSS_HASH;
 207
 208         switch (hash_type) {
 209         case NFP_NET_RSS_IPV4:
 210                 mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV4;
 211                 break;
 212         case NFP_NET_RSS_IPV6:
 213                 mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6;
 214                 break;
 215         case NFP_NET_RSS_IPV6_EX:
 216                 mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT;
 217                 break;
 218         case NFP_NET_RSS_IPV4_TCP:
 219                 mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT;
 220                 break;
 221         case NFP_NET_RSS_IPV6_TCP:
 222                 mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT;
 223                 break;
 224         case NFP_NET_RSS_IPV4_UDP:
 225                 mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT;
 226                 break;
 227         case NFP_NET_RSS_IPV6_UDP:
 228                 mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT;
 229                 break;
 230         default:
 231                 mbuf->packet_type |= RTE_PTYPE_INNER_L4_MASK;
 232         }
 233 }
 234
 235 /* nfp_net_rx_cksum - set mbuf checksum flags based on RX descriptor flags */
 236 static inline void
 237 nfp_net_rx_cksum(struct nfp_net_rxq *rxq, struct nfp_net_rx_desc *rxd,
 238                  struct rte_mbuf *mb)
 239 {
 240         struct nfp_net_hw *hw = rxq->hw;
 241
 242         if (!(hw->ctrl & NFP_NET_CFG_CTRL_RXCSUM))
 243                 return;
 244
 245         /* If IPv4 and IP checksum error, fail */
 246         if (unlikely((rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM) &&
 247             !(rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM_OK)))
 248                 mb->ol_flags |= PKT_RX_IP_CKSUM_BAD;
 249         else
 250                 mb->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
 251
 252         /* If neither UDP nor TCP return */
 253         if (!(rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM) &&
 254             !(rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM))
 255                 return;
 256
 257         if (likely(rxd->rxd.flags & PCIE_DESC_RX_L4_CSUM_OK))
 258                 mb->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
 259         else
 260                 mb->ol_flags |= PKT_RX_L4_CKSUM_BAD;
 261 }
 262
 263 /*
 264  * RX path design:
 265  *
 266  * There are some decisions to take:
 267  * 1) How to check DD RX descriptors bit
 268  * 2) How and when to allocate new mbufs
 269  *
 270  * Current implementation checks just one single DD bit each loop. As each
 271  * descriptor is 8 bytes, it is likely a good idea to check descriptors in
 272  * a single cache line instead. Tests with this change have not shown any
 273  * performance improvement but it requires further investigation. For example,
 274  * depending on which descriptor is next, the number of descriptors could be
 275  * less than 8 for just checking those in the same cache line. This implies
 276  * extra work which could be counterproductive by itself. Indeed, last firmware
 277  * changes are just doing this: writing several descriptors with the DD bit
 278  * for saving PCIe bandwidth and DMA operations from the NFP.
 279  *
 280  * Mbuf allocation is done when a new packet is received. Then the descriptor
 281  * is automatically linked with the new mbuf and the old one is given to the
 282  * user. The main drawback with this design is mbuf allocation is heavier than
 283  * using bulk allocations allowed by DPDK with rte_mempool_get_bulk. From the
 284  * cache point of view it does not seem allocating the mbuf early on as we are
 285  * doing now have any benefit at all. Again, tests with this change have not
 286  * shown any improvement. Also, rte_mempool_get_bulk returns all or nothing
 287  * so looking at the implications of this type of allocation should be studied
 288  * deeply
 289  */
 290
 291 uint16_t
 292 nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 293 {
 294         struct nfp_net_rxq *rxq;
 295         struct nfp_net_rx_desc *rxds;
 296         struct nfp_net_rx_buff *rxb;
 297         struct nfp_net_hw *hw;
 298         struct rte_mbuf *mb;
 299         struct rte_mbuf *new_mb;
 300         uint16_t nb_hold;
 301         uint64_t dma_addr;
 302         int avail;
 303
 304         rxq = rx_queue;
 305         if (unlikely(rxq == NULL)) {
 306                 /*
 307                  * DPDK just checks the queue is lower than max queues
 308                  * enabled. But the queue needs to be configured
 309                  */
 310                 RTE_LOG_DP(ERR, PMD, "RX Bad queue\n");
 311                 return -EINVAL;
 312         }
 313
 314         hw = rxq->hw;
 315         avail = 0;
 316         nb_hold = 0;
 317
 318         while (avail < nb_pkts) {
 319                 rxb = &rxq->rxbufs[rxq->rd_p];
 320                 if (unlikely(rxb == NULL)) {
 321                         RTE_LOG_DP(ERR, PMD, "rxb does not exist!\n");
 322                         break;
 323                 }
 324
 325                 rxds = &rxq->rxds[rxq->rd_p];
 326                 if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0)
 327                         break;
 328
 329                 /*
 330                  * Memory barrier to ensure that we won't do other
 331                  * reads before the DD bit.
 332                  */
 333                 rte_rmb();
 334
 335                 /*
 336                  * We got a packet. Let's alloc a new mbuf for refilling the
 337                  * free descriptor ring as soon as possible
 338                  */
 339                 new_mb = rte_pktmbuf_alloc(rxq->mem_pool);
 340                 if (unlikely(new_mb == NULL)) {
 341                         RTE_LOG_DP(DEBUG, PMD,
 342                         "RX mbuf alloc failed port_id=%u queue_id=%u\n",
 343                                 rxq->port_id, (unsigned int)rxq->qidx);
 344                         nfp_net_mbuf_alloc_failed(rxq);
 345                         break;
 346                 }
 347
 348                 nb_hold++;
 349
 350                 /*
 351                  * Grab the mbuf and refill the descriptor with the
 352                  * previously allocated mbuf
 353                  */
 354                 mb = rxb->mbuf;
 355                 rxb->mbuf = new_mb;
 356
 357                 PMD_RX_LOG(DEBUG, "Packet len: %u, mbuf_size: %u",
 358                            rxds->rxd.data_len, rxq->mbuf_size);
 359
 360                 /* Size of this segment */
 361                 mb->data_len = rxds->rxd.data_len - NFP_DESC_META_LEN(rxds);
 362                 /* Size of the whole packet. We just support 1 segment */
 363                 mb->pkt_len = rxds->rxd.data_len - NFP_DESC_META_LEN(rxds);
 364
 365                 if (unlikely((mb->data_len + hw->rx_offset) >
 366                              rxq->mbuf_size)) {
 367                         /*
 368                          * This should not happen and the user has the
 369                          * responsibility of avoiding it. But we have
 370                          * to give some info about the error
 371                          */
 372                         RTE_LOG_DP(ERR, PMD,
 373                                 "mbuf overflow likely due to the RX offset.\n"
 374                                 "\t\tYour mbuf size should have extra space for"
 375                                 " RX offset=%u bytes.\n"
 376                                 "\t\tCurrently you just have %u bytes available"
 377                                 " but the received packet is %u bytes long",
 378                                 hw->rx_offset,
 379                                 rxq->mbuf_size - hw->rx_offset,
 380                                 mb->data_len);
 381                         return -EINVAL;
 382                 }
 383
 384                 /* Filling the received mbuf with packet info */
 385                 if (hw->rx_offset)
 386                         mb->data_off = RTE_PKTMBUF_HEADROOM + hw->rx_offset;
 387                 else
 388                         mb->data_off = RTE_PKTMBUF_HEADROOM +
 389                                        NFP_DESC_META_LEN(rxds);
 390
 391                 /* No scatter mode supported */
 392                 mb->nb_segs = 1;
 393                 mb->next = NULL;
 394
 395                 mb->port = rxq->port_id;
 396
 397                 /* Checking the RSS flag */
 398                 nfp_net_set_hash(rxq, rxds, mb);
 399
 400                 /* Checking the checksum flag */
 401                 nfp_net_rx_cksum(rxq, rxds, mb);
 402
 403                 if ((rxds->rxd.flags & PCIE_DESC_RX_VLAN) &&
 404                     (hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN)) {
 405                         mb->vlan_tci = rte_cpu_to_le_32(rxds->rxd.vlan);
 406                         mb->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
 407                 }
 408
 409                 /* Adding the mbuf to the mbuf array passed by the app */
 410                 rx_pkts[avail++] = mb;
 411
 412                 /* Now resetting and updating the descriptor */
 413                 rxds->vals[0] = 0;
 414                 rxds->vals[1] = 0;
 415                 dma_addr = rte_cpu_to_le_64(RTE_MBUF_DMA_ADDR_DEFAULT(new_mb));
 416                 rxds->fld.dd = 0;
 417                 rxds->fld.dma_addr_hi = (dma_addr >> 32) & 0xff;
 418                 rxds->fld.dma_addr_lo = dma_addr & 0xffffffff;
 419
 420                 rxq->rd_p++;
 421                 if (unlikely(rxq->rd_p == rxq->rx_count)) /* wrapping?*/
 422                         rxq->rd_p = 0;
 423         }
 424
 425         if (nb_hold == 0)
 426                 return nb_hold;
 427
 428         PMD_RX_LOG(DEBUG, "RX  port_id=%u queue_id=%u, %d packets received",
 429                    rxq->port_id, (unsigned int)rxq->qidx, nb_hold);
 430
 431         nb_hold += rxq->nb_rx_hold;
 432
 433         /*
 434          * FL descriptors needs to be written before incrementing the
 435          * FL queue WR pointer
 436          */
 437         rte_wmb();
 438         if (nb_hold > rxq->rx_free_thresh) {
 439                 PMD_RX_LOG(DEBUG, "port=%u queue=%u nb_hold=%u avail=%u",
 440                            rxq->port_id, (unsigned int)rxq->qidx,
 441                            (unsigned int)nb_hold, (unsigned int)avail);
 442                 nfp_qcp_ptr_add(rxq->qcp_fl, NFP_QCP_WRITE_PTR, nb_hold);
 443                 nb_hold = 0;
 444         }
 445         rxq->nb_rx_hold = nb_hold;
 446
 447         return avail;
 448 }
 449
 450 static void
 451 nfp_net_rx_queue_release_mbufs(struct nfp_net_rxq *rxq)
 452 {
 453         unsigned int i;
 454
 455         if (rxq->rxbufs == NULL)
 456                 return;
 457
 458         for (i = 0; i < rxq->rx_count; i++) {
 459                 if (rxq->rxbufs[i].mbuf) {
 460                         rte_pktmbuf_free_seg(rxq->rxbufs[i].mbuf);
 461                         rxq->rxbufs[i].mbuf = NULL;
 462                 }
 463         }
 464 }
 465
 466 void
 467 nfp_net_rx_queue_release(void *rx_queue)
 468 {
 469         struct nfp_net_rxq *rxq = rx_queue;
 470
 471         if (rxq) {
 472                 nfp_net_rx_queue_release_mbufs(rxq);
 473                 rte_free(rxq->rxbufs);
 474                 rte_free(rxq);
 475         }
 476 }
 477
 478 void
 479 nfp_net_reset_rx_queue(struct nfp_net_rxq *rxq)
 480 {
 481         nfp_net_rx_queue_release_mbufs(rxq);
 482         rxq->rd_p = 0;
 483         rxq->nb_rx_hold = 0;
 484 }
 485
 486 int
 487 nfp_net_rx_queue_setup(struct rte_eth_dev *dev,
 488                        uint16_t queue_idx, uint16_t nb_desc,
 489                        unsigned int socket_id,
 490                        const struct rte_eth_rxconf *rx_conf,
 491                        struct rte_mempool *mp)
 492 {
 493         const struct rte_memzone *tz;
 494         struct nfp_net_rxq *rxq;
 495         struct nfp_net_hw *hw;
 496         uint32_t rx_desc_sz;
 497
 498         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 499
 500         PMD_INIT_FUNC_TRACE();
 501
 502         /* Validating number of descriptors */
 503         rx_desc_sz = nb_desc * sizeof(struct nfp_net_rx_desc);
 504         if (rx_desc_sz % NFP_ALIGN_RING_DESC != 0 ||
 505             nb_desc > NFP_NET_MAX_RX_DESC ||
 506             nb_desc < NFP_NET_MIN_RX_DESC) {
 507                 PMD_DRV_LOG(ERR, "Wrong nb_desc value");
 508                 return -EINVAL;
 509         }
 510
 511         /*
 512          * Free memory prior to re-allocation if needed. This is the case after
 513          * calling nfp_net_stop
 514          */
 515         if (dev->data->rx_queues[queue_idx]) {
 516                 nfp_net_rx_queue_release(dev->data->rx_queues[queue_idx]);
 517                 dev->data->rx_queues[queue_idx] = NULL;
 518         }
 519
 520         /* Allocating rx queue data structure */
 521         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct nfp_net_rxq),
 522                                  RTE_CACHE_LINE_SIZE, socket_id);
 523         if (rxq == NULL)
 524                 return -ENOMEM;
 525
 526         /* Hw queues mapping based on firmware configuration */
 527         rxq->qidx = queue_idx;
 528         rxq->fl_qcidx = queue_idx * hw->stride_rx;
 529         rxq->rx_qcidx = rxq->fl_qcidx + (hw->stride_rx - 1);
 530         rxq->qcp_fl = hw->rx_bar + NFP_QCP_QUEUE_OFF(rxq->fl_qcidx);
 531         rxq->qcp_rx = hw->rx_bar + NFP_QCP_QUEUE_OFF(rxq->rx_qcidx);
 532
 533         /*
 534          * Tracking mbuf size for detecting a potential mbuf overflow due to
 535          * RX offset
 536          */
 537         rxq->mem_pool = mp;
 538         rxq->mbuf_size = rxq->mem_pool->elt_size;
 539         rxq->mbuf_size -= (sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM);
 540         hw->flbufsz = rxq->mbuf_size;
 541
 542         rxq->rx_count = nb_desc;
 543         rxq->port_id = dev->data->port_id;
 544         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
 545         rxq->drop_en = rx_conf->rx_drop_en;
 546
 547         /*
 548          * Allocate RX ring hardware descriptors. A memzone large enough to
 549          * handle the maximum ring size is allocated in order to allow for
 550          * resizing in later calls to the queue setup function.
 551          */
 552         tz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
 553                                    sizeof(struct nfp_net_rx_desc) *
 554                                    NFP_NET_MAX_RX_DESC, NFP_MEMZONE_ALIGN,
 555                                    socket_id);
 556
 557         if (tz == NULL) {
 558                 PMD_DRV_LOG(ERR, "Error allocating rx dma");
 559                 nfp_net_rx_queue_release(rxq);
 560                 return -ENOMEM;
 561         }
 562
 563         /* Saving physical and virtual addresses for the RX ring */
 564         rxq->dma = (uint64_t)tz->iova;
 565         rxq->rxds = (struct nfp_net_rx_desc *)tz->addr;
 566
 567         /* mbuf pointers array for referencing mbufs linked to RX descriptors */
 568         rxq->rxbufs = rte_zmalloc_socket("rxq->rxbufs",
 569                                          sizeof(*rxq->rxbufs) * nb_desc,
 570                                          RTE_CACHE_LINE_SIZE, socket_id);
 571         if (rxq->rxbufs == NULL) {
 572                 nfp_net_rx_queue_release(rxq);
 573                 return -ENOMEM;
 574         }
 575
 576         PMD_RX_LOG(DEBUG, "rxbufs=%p hw_ring=%p dma_addr=0x%" PRIx64,
 577                    rxq->rxbufs, rxq->rxds, (unsigned long)rxq->dma);
 578
 579         nfp_net_reset_rx_queue(rxq);
 580
 581         dev->data->rx_queues[queue_idx] = rxq;
 582         rxq->hw = hw;
 583
 584         /*
 585          * Telling the HW about the physical address of the RX ring and number
 586          * of descriptors in log2 format
 587          */
 588         nn_cfg_writeq(hw, NFP_NET_CFG_RXR_ADDR(queue_idx), rxq->dma);
 589         nn_cfg_writeb(hw, NFP_NET_CFG_RXR_SZ(queue_idx), rte_log2_u32(nb_desc));
 590
 591         return 0;
 592 }
 593
 594 /*
 595  * nfp_net_tx_free_bufs - Check for descriptors with a complete
 596  * status
 597  * @txq: TX queue to work with
 598  * Returns number of descriptors freed
 599  */
 600 static int
 601 nfp_net_tx_free_bufs(struct nfp_net_txq *txq)
 602 {
 603         uint32_t qcp_rd_p;
 604         int todo;
 605
 606         PMD_TX_LOG(DEBUG, "queue %u. Check for descriptor with a complete"
 607                    " status", txq->qidx);
 608
 609         /* Work out how many packets have been sent */
 610         qcp_rd_p = nfp_qcp_read(txq->qcp_q, NFP_QCP_READ_PTR);
 611
 612         if (qcp_rd_p == txq->rd_p) {
 613                 PMD_TX_LOG(DEBUG, "queue %u: It seems harrier is not sending "
 614                            "packets (%u, %u)", txq->qidx,
 615                            qcp_rd_p, txq->rd_p);
 616                 return 0;
 617         }
 618
 619         if (qcp_rd_p > txq->rd_p)
 620                 todo = qcp_rd_p - txq->rd_p;
 621         else
 622                 todo = qcp_rd_p + txq->tx_count - txq->rd_p;
 623
 624         PMD_TX_LOG(DEBUG, "qcp_rd_p %u, txq->rd_p: %u, qcp->rd_p: %u",
 625                    qcp_rd_p, txq->rd_p, txq->rd_p);
 626
 627         if (todo == 0)
 628                 return todo;
 629
 630         txq->rd_p += todo;
 631         if (unlikely(txq->rd_p >= txq->tx_count))
 632                 txq->rd_p -= txq->tx_count;
 633
 634         return todo;
 635 }
 636
 637 static void
 638 nfp_net_tx_queue_release_mbufs(struct nfp_net_txq *txq)
 639 {
 640         unsigned int i;
 641
 642         if (txq->txbufs == NULL)
 643                 return;
 644
 645         for (i = 0; i < txq->tx_count; i++) {
 646                 if (txq->txbufs[i].mbuf) {
 647                         rte_pktmbuf_free_seg(txq->txbufs[i].mbuf);
 648                         txq->txbufs[i].mbuf = NULL;
 649                 }
 650         }
 651 }
 652
 653 void
 654 nfp_net_tx_queue_release(void *tx_queue)
 655 {
 656         struct nfp_net_txq *txq = tx_queue;
 657
 658         if (txq) {
 659                 nfp_net_tx_queue_release_mbufs(txq);
 660                 rte_free(txq->txbufs);
 661                 rte_free(txq);
 662         }
 663 }
 664
 665 void
 666 nfp_net_reset_tx_queue(struct nfp_net_txq *txq)
 667 {
 668         nfp_net_tx_queue_release_mbufs(txq);
 669         txq->wr_p = 0;
 670         txq->rd_p = 0;
 671 }
 672
 673 int
 674 nfp_net_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 675                        uint16_t nb_desc, unsigned int socket_id,
 676                        const struct rte_eth_txconf *tx_conf)
 677 {
 678         const struct rte_memzone *tz;
 679         struct nfp_net_txq *txq;
 680         uint16_t tx_free_thresh;
 681         struct nfp_net_hw *hw;
 682         uint32_t tx_desc_sz;
 683
 684         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 685
 686         PMD_INIT_FUNC_TRACE();
 687
 688         /* Validating number of descriptors */
 689         tx_desc_sz = nb_desc * sizeof(struct nfp_net_tx_desc);
 690         if (tx_desc_sz % NFP_ALIGN_RING_DESC != 0 ||
 691             nb_desc > NFP_NET_MAX_TX_DESC ||
 692             nb_desc < NFP_NET_MIN_TX_DESC) {
 693                 PMD_DRV_LOG(ERR, "Wrong nb_desc value");
 694                 return -EINVAL;
 695         }
 696
 697         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
 698                                     tx_conf->tx_free_thresh :
 699                                     DEFAULT_TX_FREE_THRESH);
 700
 701         if (tx_free_thresh > (nb_desc)) {
 702                 PMD_DRV_LOG(ERR,
 703                         "tx_free_thresh must be less than the number of TX "
 704                         "descriptors. (tx_free_thresh=%u port=%d "
 705                         "queue=%d)", (unsigned int)tx_free_thresh,
 706                         dev->data->port_id, (int)queue_idx);
 707                 return -(EINVAL);
 708         }
 709
 710         /*
 711          * Free memory prior to re-allocation if needed. This is the case after
 712          * calling nfp_net_stop
 713          */
 714         if (dev->data->tx_queues[queue_idx]) {
 715                 PMD_TX_LOG(DEBUG, "Freeing memory prior to re-allocation %d",
 716                            queue_idx);
 717                 nfp_net_tx_queue_release(dev->data->tx_queues[queue_idx]);
 718                 dev->data->tx_queues[queue_idx] = NULL;
 719         }
 720
 721         /* Allocating tx queue data structure */
 722         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct nfp_net_txq),
 723                                  RTE_CACHE_LINE_SIZE, socket_id);
 724         if (txq == NULL) {
 725                 PMD_DRV_LOG(ERR, "Error allocating tx dma");
 726                 return -ENOMEM;
 727         }
 728
 729         /*
 730          * Allocate TX ring hardware descriptors. A memzone large enough to
 731          * handle the maximum ring size is allocated in order to allow for
 732          * resizing in later calls to the queue setup function.
 733          */
 734         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
 735                                    sizeof(struct nfp_net_tx_desc) *
 736                                    NFP_NET_MAX_TX_DESC, NFP_MEMZONE_ALIGN,
 737                                    socket_id);
 738         if (tz == NULL) {
 739                 PMD_DRV_LOG(ERR, "Error allocating tx dma");
 740                 nfp_net_tx_queue_release(txq);
 741                 return -ENOMEM;
 742         }
 743
 744         txq->tx_count = nb_desc;
 745         txq->tx_free_thresh = tx_free_thresh;
 746         txq->tx_pthresh = tx_conf->tx_thresh.pthresh;
 747         txq->tx_hthresh = tx_conf->tx_thresh.hthresh;
 748         txq->tx_wthresh = tx_conf->tx_thresh.wthresh;
 749
 750         /* queue mapping based on firmware configuration */
 751         txq->qidx = queue_idx;
 752         txq->tx_qcidx = queue_idx * hw->stride_tx;
 753         txq->qcp_q = hw->tx_bar + NFP_QCP_QUEUE_OFF(txq->tx_qcidx);
 754
 755         txq->port_id = dev->data->port_id;
 756
 757         /* Saving physical and virtual addresses for the TX ring */
 758         txq->dma = (uint64_t)tz->iova;
 759         txq->txds = (struct nfp_net_tx_desc *)tz->addr;
 760
 761         /* mbuf pointers array for referencing mbufs linked to TX descriptors */
 762         txq->txbufs = rte_zmalloc_socket("txq->txbufs",
 763                                          sizeof(*txq->txbufs) * nb_desc,
 764                                          RTE_CACHE_LINE_SIZE, socket_id);
 765         if (txq->txbufs == NULL) {
 766                 nfp_net_tx_queue_release(txq);
 767                 return -ENOMEM;
 768         }
 769         PMD_TX_LOG(DEBUG, "txbufs=%p hw_ring=%p dma_addr=0x%" PRIx64,
 770                    txq->txbufs, txq->txds, (unsigned long)txq->dma);
 771
 772         nfp_net_reset_tx_queue(txq);
 773
 774         dev->data->tx_queues[queue_idx] = txq;
 775         txq->hw = hw;
 776
 777         /*
 778          * Telling the HW about the physical address of the TX ring and number
 779          * of descriptors in log2 format
 780          */
 781         nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(queue_idx), txq->dma);
 782         nn_cfg_writeb(hw, NFP_NET_CFG_TXR_SZ(queue_idx), rte_log2_u32(nb_desc));
 783
 784         return 0;
 785 }
 786
 787 /* Leaving always free descriptors for avoiding wrapping confusion */
 788 static inline
 789 uint32_t nfp_free_tx_desc(struct nfp_net_txq *txq)
 790 {
 791         if (txq->wr_p >= txq->rd_p)
 792                 return txq->tx_count - (txq->wr_p - txq->rd_p) - 8;
 793         else
 794                 return txq->rd_p - txq->wr_p - 8;
 795 }
 796
 797 /*
 798  * nfp_net_txq_full - Check if the TX queue free descriptors
 799  * is below tx_free_threshold
 800  *
 801  * @txq: TX queue to check
 802  *
 803  * This function uses the host copy* of read/write pointers
 804  */
 805 static inline
 806 uint32_t nfp_net_txq_full(struct nfp_net_txq *txq)
 807 {
 808         return (nfp_free_tx_desc(txq) < txq->tx_free_thresh);
 809 }
 810
 811 /* nfp_net_tx_tso - Set TX descriptor for TSO */
 812 static inline void
 813 nfp_net_tx_tso(struct nfp_net_txq *txq, struct nfp_net_tx_desc *txd,
 814                struct rte_mbuf *mb)
 815 {
 816         uint64_t ol_flags;
 817         struct nfp_net_hw *hw = txq->hw;
 818
 819         if (!(hw->cap & NFP_NET_CFG_CTRL_LSO_ANY))
 820                 goto clean_txd;
 821
 822         ol_flags = mb->ol_flags;
 823
 824         if (!(ol_flags & PKT_TX_TCP_SEG))
 825                 goto clean_txd;
 826
 827         txd->l3_offset = mb->l2_len;
 828         txd->l4_offset = mb->l2_len + mb->l3_len;
 829         txd->lso_hdrlen = mb->l2_len + mb->l3_len + mb->l4_len;
 830         txd->mss = rte_cpu_to_le_16(mb->tso_segsz);
 831         txd->flags = PCIE_DESC_TX_LSO;
 832         return;
 833
 834 clean_txd:
 835         txd->flags = 0;
 836         txd->l3_offset = 0;
 837         txd->l4_offset = 0;
 838         txd->lso_hdrlen = 0;
 839         txd->mss = 0;
 840 }
 841
 842 /* nfp_net_tx_cksum - Set TX CSUM offload flags in TX descriptor */
 843 static inline void
 844 nfp_net_tx_cksum(struct nfp_net_txq *txq, struct nfp_net_tx_desc *txd,
 845                  struct rte_mbuf *mb)
 846 {
 847         uint64_t ol_flags;
 848         struct nfp_net_hw *hw = txq->hw;
 849
 850         if (!(hw->cap & NFP_NET_CFG_CTRL_TXCSUM))
 851                 return;
 852
 853         ol_flags = mb->ol_flags;
 854
 855         /* IPv6 does not need checksum */
 856         if (ol_flags & PKT_TX_IP_CKSUM)
 857                 txd->flags |= PCIE_DESC_TX_IP4_CSUM;
 858
 859         switch (ol_flags & PKT_TX_L4_MASK) {
 860         case PKT_TX_UDP_CKSUM:
 861                 txd->flags |= PCIE_DESC_TX_UDP_CSUM;
 862                 break;
 863         case PKT_TX_TCP_CKSUM:
 864                 txd->flags |= PCIE_DESC_TX_TCP_CSUM;
 865                 break;
 866         }
 867
 868         if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK))
 869                 txd->flags |= PCIE_DESC_TX_CSUM;
 870 }
 871
 872 uint16_t
 873 nfp_net_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 874 {
 875         struct nfp_net_txq *txq;
 876         struct nfp_net_hw *hw;
 877         struct nfp_net_tx_desc *txds, txd;
 878         struct rte_mbuf *pkt;
 879         uint64_t dma_addr;
 880         int pkt_size, dma_size;
 881         uint16_t free_descs, issued_descs;
 882         struct rte_mbuf **lmbuf;
 883         int i;
 884
 885         txq = tx_queue;
 886         hw = txq->hw;
 887         txds = &txq->txds[txq->wr_p];
 888
 889         PMD_TX_LOG(DEBUG, "working for queue %u at pos %d and %u packets",
 890                    txq->qidx, txq->wr_p, nb_pkts);
 891
 892         if ((nfp_free_tx_desc(txq) < nb_pkts) || (nfp_net_txq_full(txq)))
 893                 nfp_net_tx_free_bufs(txq);
 894
 895         free_descs = (uint16_t)nfp_free_tx_desc(txq);
 896         if (unlikely(free_descs == 0))
 897                 return 0;
 898
 899         pkt = *tx_pkts;
 900
 901         i = 0;
 902         issued_descs = 0;
 903         PMD_TX_LOG(DEBUG, "queue: %u. Sending %u packets",
 904                    txq->qidx, nb_pkts);
 905         /* Sending packets */
 906         while ((i < nb_pkts) && free_descs) {
 907                 /* Grabbing the mbuf linked to the current descriptor */
 908                 lmbuf = &txq->txbufs[txq->wr_p].mbuf;
 909                 /* Warming the cache for releasing the mbuf later on */
 910                 RTE_MBUF_PREFETCH_TO_FREE(*lmbuf);
 911
 912                 pkt = *(tx_pkts + i);
 913
 914                 if (unlikely(pkt->nb_segs > 1 &&
 915                              !(hw->cap & NFP_NET_CFG_CTRL_GATHER))) {
 916                         PMD_INIT_LOG(INFO, "NFP_NET_CFG_CTRL_GATHER not set");
 917                         rte_panic("Multisegment packet unsupported\n");
 918                 }
 919
 920                 /* Checking if we have enough descriptors */
 921                 if (unlikely(pkt->nb_segs > free_descs))
 922                         goto xmit_end;
 923
 924                 /*
 925                  * Checksum and VLAN flags just in the first descriptor for a
 926                  * multisegment packet, but TSO info needs to be in all of them.
 927                  */
 928                 txd.data_len = pkt->pkt_len;
 929                 nfp_net_tx_tso(txq, &txd, pkt);
 930                 nfp_net_tx_cksum(txq, &txd, pkt);
 931
 932                 if ((pkt->ol_flags & PKT_TX_VLAN_PKT) &&
 933                     (hw->cap & NFP_NET_CFG_CTRL_TXVLAN)) {
 934                         txd.flags |= PCIE_DESC_TX_VLAN;
 935                         txd.vlan = pkt->vlan_tci;
 936                 }
 937
 938                 /*
 939                  * mbuf data_len is the data in one segment and pkt_len data
 940                  * in the whole packet. When the packet is just one segment,
 941                  * then data_len = pkt_len
 942                  */
 943                 pkt_size = pkt->pkt_len;
 944
 945                 while (pkt) {
 946                         /* Copying TSO, VLAN and cksum info */
 947                         *txds = txd;
 948
 949                         /* Releasing mbuf used by this descriptor previously*/
 950                         if (*lmbuf)
 951                                 rte_pktmbuf_free_seg(*lmbuf);
 952
 953                         /*
 954                          * Linking mbuf with descriptor for being released
 955                          * next time descriptor is used
 956                          */
 957                         *lmbuf = pkt;
 958
 959                         dma_size = pkt->data_len;
 960                         dma_addr = rte_mbuf_data_iova(pkt);
 961                         PMD_TX_LOG(DEBUG, "Working with mbuf at dma address:"
 962                                    "%" PRIx64 "", dma_addr);
 963
 964                         /* Filling descriptors fields */
 965                         txds->dma_len = dma_size;
 966                         txds->data_len = txd.data_len;
 967                         txds->dma_addr_hi = (dma_addr >> 32) & 0xff;
 968                         txds->dma_addr_lo = (dma_addr & 0xffffffff);
 969                         ASSERT(free_descs > 0);
 970                         free_descs--;
 971
 972                         txq->wr_p++;
 973                         if (unlikely(txq->wr_p == txq->tx_count)) /* wrapping?*/
 974                                 txq->wr_p = 0;
 975
 976                         pkt_size -= dma_size;
 977
 978                         /*
 979                          * Making the EOP, packets with just one segment
 980                          * the priority
 981                          */
 982                         if (likely(!pkt_size))
 983                                 txds->offset_eop = PCIE_DESC_TX_EOP;
 984                         else
 985                                 txds->offset_eop = 0;
 986
 987                         pkt = pkt->next;
 988                         /* Referencing next free TX descriptor */
 989                         txds = &txq->txds[txq->wr_p];
 990                         lmbuf = &txq->txbufs[txq->wr_p].mbuf;
 991                         issued_descs++;
 992                 }
 993                 i++;
 994         }
 995
 996 xmit_end:
 997         /* Increment write pointers. Force memory write before we let HW know */
 998         rte_wmb();
 999         nfp_qcp_ptr_add(txq->qcp_q, NFP_QCP_WRITE_PTR, issued_descs);
1000
1001         return i;
1002 }