drivers/net/enetc/enetc_rxtx.c

   1 /* SPDX-License-Identifier: BSD-3-Clause
   2  * Copyright 2018-2020 NXP
   3  */
   4
   5 #include <stdbool.h>
   6 #include <stdint.h>
   7 #include <unistd.h>
   8
   9 #include "rte_ethdev.h"
  10 #include "rte_malloc.h"
  11 #include "rte_memzone.h"
  12
  13 #include "base/enetc_hw.h"
  14 #include "enetc.h"
  15 #include "enetc_logs.h"
  16
  17 #define ENETC_CACHE_LINE_RXBDS  (RTE_CACHE_LINE_SIZE / \
  18                                  sizeof(union enetc_rx_bd))
  19 #define ENETC_RXBD_BUNDLE 16 /* Number of buffers to allocate at once */
  20
  21 static int
  22 enetc_clean_tx_ring(struct enetc_bdr *tx_ring)
  23 {
  24         int tx_frm_cnt = 0;
  25         struct enetc_swbd *tx_swbd, *tx_swbd_base;
  26         int i, hwci, bd_count;
  27         struct rte_mbuf *m[ENETC_RXBD_BUNDLE];
  28
  29         /* we don't need barriers here, we just want a relatively current value
  30          * from HW.
  31          */
  32         hwci = (int)(rte_read32_relaxed(tx_ring->tcisr) &
  33                      ENETC_TBCISR_IDX_MASK);
  34
  35         tx_swbd_base = tx_ring->q_swbd;
  36         bd_count = tx_ring->bd_count;
  37         i = tx_ring->next_to_clean;
  38         tx_swbd = &tx_swbd_base[i];
  39
  40         /* we're only reading the CI index once here, which means HW may update
  41          * it while we're doing clean-up.  We could read the register in a loop
  42          * but for now I assume it's OK to leave a few Tx frames for next call.
  43          * The issue with reading the register in a loop is that we're stalling
  44          * here trying to catch up with HW which keeps sending traffic as long
  45          * as it has traffic to send, so in effect we could be waiting here for
  46          * the Tx ring to be drained by HW, instead of us doing Rx in that
  47          * meantime.
  48          */
  49         while (i != hwci) {
  50                 /* It seems calling rte_pktmbuf_free is wasting a lot of cycles,
  51                  * make a list and call _free when it's done.
  52                  */
  53                 if (tx_frm_cnt == ENETC_RXBD_BUNDLE) {
  54                         rte_pktmbuf_free_bulk(m, tx_frm_cnt);
  55                         tx_frm_cnt = 0;
  56                 }
  57
  58                 m[tx_frm_cnt] = tx_swbd->buffer_addr;
  59                 tx_swbd->buffer_addr = NULL;
  60
  61                 i++;
  62                 tx_swbd++;
  63                 if (unlikely(i == bd_count)) {
  64                         i = 0;
  65                         tx_swbd = tx_swbd_base;
  66                 }
  67
  68                 tx_frm_cnt++;
  69         }
  70
  71         if (tx_frm_cnt)
  72                 rte_pktmbuf_free_bulk(m, tx_frm_cnt);
  73
  74         tx_ring->next_to_clean = i;
  75
  76         return 0;
  77 }
  78
  79 uint16_t
  80 enetc_xmit_pkts(void *tx_queue,
  81                 struct rte_mbuf **tx_pkts,
  82                 uint16_t nb_pkts)
  83 {
  84         struct enetc_swbd *tx_swbd;
  85         int i, start, bds_to_use;
  86         struct enetc_tx_bd *txbd;
  87         struct enetc_bdr *tx_ring = (struct enetc_bdr *)tx_queue;
  88
  89         i = tx_ring->next_to_use;
  90
  91         bds_to_use = enetc_bd_unused(tx_ring);
  92         if (bds_to_use < nb_pkts)
  93                 nb_pkts = bds_to_use;
  94
  95         start = 0;
  96         while (nb_pkts--) {
  97                 tx_ring->q_swbd[i].buffer_addr = tx_pkts[start];
  98                 txbd = ENETC_TXBD(*tx_ring, i);
  99                 tx_swbd = &tx_ring->q_swbd[i];
 100                 txbd->frm_len = tx_pkts[start]->pkt_len;
 101                 txbd->buf_len = txbd->frm_len;
 102                 txbd->flags = rte_cpu_to_le_16(ENETC_TXBD_FLAGS_F);
 103                 txbd->addr = (uint64_t)(uintptr_t)
 104                 rte_cpu_to_le_64((size_t)tx_swbd->buffer_addr->buf_iova +
 105                                  tx_swbd->buffer_addr->data_off);
 106                 i++;
 107                 start++;
 108                 if (unlikely(i == tx_ring->bd_count))
 109                         i = 0;
 110         }
 111
 112         /* we're only cleaning up the Tx ring here, on the assumption that
 113          * software is slower than hardware and hardware completed sending
 114          * older frames out by now.
 115          * We're also cleaning up the ring before kicking off Tx for the new
 116          * batch to minimize chances of contention on the Tx ring
 117          */
 118         enetc_clean_tx_ring(tx_ring);
 119
 120         tx_ring->next_to_use = i;
 121         enetc_wr_reg(tx_ring->tcir, i);
 122         return start;
 123 }
 124
 125 int
 126 enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
 127 {
 128         struct enetc_swbd *rx_swbd;
 129         union enetc_rx_bd *rxbd;
 130         int i, j, k = ENETC_RXBD_BUNDLE;
 131         struct rte_mbuf *m[ENETC_RXBD_BUNDLE];
 132         struct rte_mempool *mb_pool;
 133
 134         i = rx_ring->next_to_use;
 135         mb_pool = rx_ring->mb_pool;
 136         rx_swbd = &rx_ring->q_swbd[i];
 137         rxbd = ENETC_RXBD(*rx_ring, i);
 138         for (j = 0; j < buff_cnt; j++) {
 139                 /* bulk alloc for the next up to 8 BDs */
 140                 if (k == ENETC_RXBD_BUNDLE) {
 141                         k = 0;
 142                         int m_cnt = RTE_MIN(buff_cnt - j, ENETC_RXBD_BUNDLE);
 143
 144                         if (rte_pktmbuf_alloc_bulk(mb_pool, m, m_cnt))
 145                                 return -1;
 146                 }
 147
 148                 rx_swbd->buffer_addr = m[k];
 149                 rxbd->w.addr = (uint64_t)(uintptr_t)
 150                                rx_swbd->buffer_addr->buf_iova +
 151                                rx_swbd->buffer_addr->data_off;
 152                 /* clear 'R" as well */
 153                 rxbd->r.lstatus = 0;
 154                 rx_swbd++;
 155                 rxbd++;
 156                 i++;
 157                 k++;
 158                 if (unlikely(i == rx_ring->bd_count)) {
 159                         i = 0;
 160                         rxbd = ENETC_RXBD(*rx_ring, 0);
 161                         rx_swbd = &rx_ring->q_swbd[i];
 162                 }
 163         }
 164
 165         if (likely(j)) {
 166                 rx_ring->next_to_alloc = i;
 167                 rx_ring->next_to_use = i;
 168                 enetc_wr_reg(rx_ring->rcir, i);
 169         }
 170
 171         return j;
 172 }
 173
 174 static inline void enetc_slow_parsing(struct rte_mbuf *m,
 175                                      uint64_t parse_results)
 176 {
 177         m->ol_flags &= ~(PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
 178
 179         switch (parse_results) {
 180         case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV4:
 181                 m->packet_type = RTE_PTYPE_L2_ETHER |
 182                                  RTE_PTYPE_L3_IPV4;
 183                 m->ol_flags |= PKT_RX_IP_CKSUM_BAD;
 184                 return;
 185         case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV6:
 186                 m->packet_type = RTE_PTYPE_L2_ETHER |
 187                                  RTE_PTYPE_L3_IPV6;
 188                 m->ol_flags |= PKT_RX_IP_CKSUM_BAD;
 189                 return;
 190         case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV4_TCP:
 191                 m->packet_type = RTE_PTYPE_L2_ETHER |
 192                                  RTE_PTYPE_L3_IPV4 |
 193                                  RTE_PTYPE_L4_TCP;
 194                 m->ol_flags |= PKT_RX_IP_CKSUM_GOOD |
 195                                PKT_RX_L4_CKSUM_BAD;
 196                 return;
 197         case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV6_TCP:
 198                 m->packet_type = RTE_PTYPE_L2_ETHER |
 199                                  RTE_PTYPE_L3_IPV6 |
 200                                  RTE_PTYPE_L4_TCP;
 201                 m->ol_flags |= PKT_RX_IP_CKSUM_GOOD |
 202                                PKT_RX_L4_CKSUM_BAD;
 203                 return;
 204         case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV4_UDP:
 205                 m->packet_type = RTE_PTYPE_L2_ETHER |
 206                                  RTE_PTYPE_L3_IPV4 |
 207                                  RTE_PTYPE_L4_UDP;
 208                 m->ol_flags |= PKT_RX_IP_CKSUM_GOOD |
 209                                PKT_RX_L4_CKSUM_BAD;
 210                 return;
 211         case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV6_UDP:
 212                 m->packet_type = RTE_PTYPE_L2_ETHER |
 213                                  RTE_PTYPE_L3_IPV6 |
 214                                  RTE_PTYPE_L4_UDP;
 215                 m->ol_flags |= PKT_RX_IP_CKSUM_GOOD |
 216                                PKT_RX_L4_CKSUM_BAD;
 217                 return;
 218         case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV4_SCTP:
 219                 m->packet_type = RTE_PTYPE_L2_ETHER |
 220                                  RTE_PTYPE_L3_IPV4 |
 221                                  RTE_PTYPE_L4_SCTP;
 222                 m->ol_flags |= PKT_RX_IP_CKSUM_GOOD |
 223                                PKT_RX_L4_CKSUM_BAD;
 224                 return;
 225         case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV6_SCTP:
 226                 m->packet_type = RTE_PTYPE_L2_ETHER |
 227                                  RTE_PTYPE_L3_IPV6 |
 228                                  RTE_PTYPE_L4_SCTP;
 229                 m->ol_flags |= PKT_RX_IP_CKSUM_GOOD |
 230                                PKT_RX_L4_CKSUM_BAD;
 231                 return;
 232         case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV4_ICMP:
 233                 m->packet_type = RTE_PTYPE_L2_ETHER |
 234                                  RTE_PTYPE_L3_IPV4 |
 235                                  RTE_PTYPE_L4_ICMP;
 236                 m->ol_flags |= PKT_RX_IP_CKSUM_GOOD |
 237                                PKT_RX_L4_CKSUM_BAD;
 238                 return;
 239         case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV6_ICMP:
 240                 m->packet_type = RTE_PTYPE_L2_ETHER |
 241                                  RTE_PTYPE_L3_IPV6 |
 242                                  RTE_PTYPE_L4_ICMP;
 243                 m->ol_flags |= PKT_RX_IP_CKSUM_GOOD |
 244                                PKT_RX_L4_CKSUM_BAD;
 245                 return;
 246         /* More switch cases can be added */
 247         default:
 248                 m->packet_type = RTE_PTYPE_UNKNOWN;
 249                 m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN |
 250                                PKT_RX_L4_CKSUM_UNKNOWN;
 251         }
 252 }
 253
 254
 255 static inline void __rte_hot
 256 enetc_dev_rx_parse(struct rte_mbuf *m, uint16_t parse_results)
 257 {
 258         ENETC_PMD_DP_DEBUG("parse summary = 0x%x   ", parse_results);
 259         m->ol_flags |= PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD;
 260
 261         switch (parse_results) {
 262         case ENETC_PKT_TYPE_ETHER:
 263                 m->packet_type = RTE_PTYPE_L2_ETHER;
 264                 return;
 265         case ENETC_PKT_TYPE_IPV4:
 266                 m->packet_type = RTE_PTYPE_L2_ETHER |
 267                                  RTE_PTYPE_L3_IPV4;
 268                 return;
 269         case ENETC_PKT_TYPE_IPV6:
 270                 m->packet_type = RTE_PTYPE_L2_ETHER |
 271                                  RTE_PTYPE_L3_IPV6;
 272                 return;
 273         case ENETC_PKT_TYPE_IPV4_TCP:
 274                 m->packet_type = RTE_PTYPE_L2_ETHER |
 275                                  RTE_PTYPE_L3_IPV4 |
 276                                  RTE_PTYPE_L4_TCP;
 277                 return;
 278         case ENETC_PKT_TYPE_IPV6_TCP:
 279                 m->packet_type = RTE_PTYPE_L2_ETHER |
 280                                  RTE_PTYPE_L3_IPV6 |
 281                                  RTE_PTYPE_L4_TCP;
 282                 return;
 283         case ENETC_PKT_TYPE_IPV4_UDP:
 284                 m->packet_type = RTE_PTYPE_L2_ETHER |
 285                                  RTE_PTYPE_L3_IPV4 |
 286                                  RTE_PTYPE_L4_UDP;
 287                 return;
 288         case ENETC_PKT_TYPE_IPV6_UDP:
 289                 m->packet_type = RTE_PTYPE_L2_ETHER |
 290                                  RTE_PTYPE_L3_IPV6 |
 291                                  RTE_PTYPE_L4_UDP;
 292                 return;
 293         case ENETC_PKT_TYPE_IPV4_SCTP:
 294                 m->packet_type = RTE_PTYPE_L2_ETHER |
 295                                  RTE_PTYPE_L3_IPV4 |
 296                                  RTE_PTYPE_L4_SCTP;
 297                 return;
 298         case ENETC_PKT_TYPE_IPV6_SCTP:
 299                 m->packet_type = RTE_PTYPE_L2_ETHER |
 300                                  RTE_PTYPE_L3_IPV6 |
 301                                  RTE_PTYPE_L4_SCTP;
 302                 return;
 303         case ENETC_PKT_TYPE_IPV4_ICMP:
 304                 m->packet_type = RTE_PTYPE_L2_ETHER |
 305                                  RTE_PTYPE_L3_IPV4 |
 306                                  RTE_PTYPE_L4_ICMP;
 307                 return;
 308         case ENETC_PKT_TYPE_IPV6_ICMP:
 309                 m->packet_type = RTE_PTYPE_L2_ETHER |
 310                                  RTE_PTYPE_L3_IPV6 |
 311                                  RTE_PTYPE_L4_ICMP;
 312                 return;
 313         /* More switch cases can be added */
 314         default:
 315                 enetc_slow_parsing(m, parse_results);
 316         }
 317
 318 }
 319
 320 static int
 321 enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 322                     struct rte_mbuf **rx_pkts,
 323                     int work_limit)
 324 {
 325         int rx_frm_cnt = 0;
 326         int cleaned_cnt, i, bd_count;
 327         struct enetc_swbd *rx_swbd;
 328         union enetc_rx_bd *rxbd;
 329
 330         /* next descriptor to process */
 331         i = rx_ring->next_to_clean;
 332         /* next descriptor to process */
 333         rxbd = ENETC_RXBD(*rx_ring, i);
 334         rte_prefetch0(rxbd);
 335         bd_count = rx_ring->bd_count;
 336         /* LS1028A does not have platform cache so any software access following
 337          * a hardware write will go directly to DDR.  Latency of such a read is
 338          * in excess of 100 core cycles, so try to prefetch more in advance to
 339          * mitigate this.
 340          * How much is worth prefetching really depends on traffic conditions.
 341          * With congested Rx this could go up to 4 cache lines or so.  But if
 342          * software keeps up with hardware and follows behind Rx PI by a cache
 343          * line or less then it's harmful in terms of performance to cache more.
 344          * We would only prefetch BDs that have yet to be written by ENETC,
 345          * which will have to be evicted again anyway.
 346          */
 347         rte_prefetch0(ENETC_RXBD(*rx_ring,
 348                                  (i + ENETC_CACHE_LINE_RXBDS) % bd_count));
 349         rte_prefetch0(ENETC_RXBD(*rx_ring,
 350                                  (i + ENETC_CACHE_LINE_RXBDS * 2) % bd_count));
 351
 352         cleaned_cnt = enetc_bd_unused(rx_ring);
 353         rx_swbd = &rx_ring->q_swbd[i];
 354         while (likely(rx_frm_cnt < work_limit)) {
 355                 uint32_t bd_status;
 356
 357                 bd_status = rte_le_to_cpu_32(rxbd->r.lstatus);
 358                 if (!bd_status)
 359                         break;
 360
 361                 rx_swbd->buffer_addr->pkt_len = rxbd->r.buf_len -
 362                                                 rx_ring->crc_len;
 363                 rx_swbd->buffer_addr->data_len = rxbd->r.buf_len -
 364                                                  rx_ring->crc_len;
 365                 rx_swbd->buffer_addr->hash.rss = rxbd->r.rss_hash;
 366                 rx_swbd->buffer_addr->ol_flags = 0;
 367                 enetc_dev_rx_parse(rx_swbd->buffer_addr,
 368                                    rxbd->r.parse_summary);
 369                 rx_pkts[rx_frm_cnt] = rx_swbd->buffer_addr;
 370                 cleaned_cnt++;
 371                 rx_swbd++;
 372                 i++;
 373                 if (unlikely(i == rx_ring->bd_count)) {
 374                         i = 0;
 375                         rx_swbd = &rx_ring->q_swbd[i];
 376                 }
 377                 rxbd = ENETC_RXBD(*rx_ring, i);
 378                 rte_prefetch0(ENETC_RXBD(*rx_ring,
 379                                          (i + ENETC_CACHE_LINE_RXBDS) %
 380                                           bd_count));
 381                 rte_prefetch0(ENETC_RXBD(*rx_ring,
 382                                          (i + ENETC_CACHE_LINE_RXBDS * 2) %
 383                                          bd_count));
 384
 385                 rx_frm_cnt++;
 386         }
 387
 388         rx_ring->next_to_clean = i;
 389         enetc_refill_rx_ring(rx_ring, cleaned_cnt);
 390
 391         return rx_frm_cnt;
 392 }
 393
 394 uint16_t
 395 enetc_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
 396                 uint16_t nb_pkts)
 397 {
 398         struct enetc_bdr *rx_ring = (struct enetc_bdr *)rxq;
 399
 400         return enetc_clean_rx_ring(rx_ring, rx_pkts, nb_pkts);
 401 }