X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Faf_xdp%2Frte_eth_af_xdp.c;h=ed784dff198274c260d68d1e8b51af217712dca0;hb=43e34a229d3e575eb5ed075091afef0e5bf60a77;hp=dd5c88cac47eb3a190b6d7cff2c0c3e9532b9976;hpb=e9ff8bb719434c5ee6ccb6f3acbdac246f965875;p=dpdk.git diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c index dd5c88cac4..ed784dff19 100644 --- a/drivers/net/af_xdp/rte_eth_af_xdp.c +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -12,6 +13,8 @@ #include #include #include +#include +#include #include "af_xdp_deps.h" #include @@ -23,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -53,16 +55,22 @@ static int af_xdp_logtype; rte_log(RTE_LOG_ ## level, af_xdp_logtype, \ "%s(): " fmt, __func__, ##args) -#define ETH_AF_XDP_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE +#define ETH_AF_XDP_FRAME_SIZE 2048 #define ETH_AF_XDP_NUM_BUFFERS 4096 +#ifdef XDP_UMEM_UNALIGNED_CHUNK_FLAG +#define ETH_AF_XDP_MBUF_OVERHEAD 128 /* sizeof(struct rte_mbuf) */ +#define ETH_AF_XDP_DATA_HEADROOM \ + (ETH_AF_XDP_MBUF_OVERHEAD + RTE_PKTMBUF_HEADROOM) +#else #define ETH_AF_XDP_DATA_HEADROOM 0 +#endif #define ETH_AF_XDP_DFLT_NUM_DESCS XSK_RING_CONS__DEFAULT_NUM_DESCS -#define ETH_AF_XDP_DFLT_QUEUE_IDX 0 +#define ETH_AF_XDP_DFLT_START_QUEUE_IDX 0 +#define ETH_AF_XDP_DFLT_QUEUE_COUNT 1 #define ETH_AF_XDP_RX_BATCH_SIZE 32 #define ETH_AF_XDP_TX_BATCH_SIZE 32 -#define ETH_AF_XDP_MAX_QUEUE_PAIRS 16 struct xsk_umem_info { struct xsk_ring_prod fq; @@ -70,7 +78,8 @@ struct xsk_umem_info { struct xsk_umem *umem; struct rte_ring *buf_ring; const struct rte_memzone *mz; - int pmd_zc; + struct rte_mempool *mb_pool; + void *buffer; }; struct rx_stats { @@ -88,44 +97,48 @@ struct pkt_rx_queue { struct rx_stats stats; struct pkt_tx_queue *pair; - uint16_t queue_idx; + struct pollfd fds[1]; + int xsk_queue_idx; }; struct tx_stats { uint64_t tx_pkts; - uint64_t err_pkts; uint64_t tx_bytes; + uint64_t tx_dropped; }; struct pkt_tx_queue { struct xsk_ring_prod tx; + struct xsk_umem_info *umem; struct tx_stats stats; struct pkt_rx_queue *pair; - uint16_t queue_idx; + int xsk_queue_idx; }; struct pmd_internals { int if_index; char if_name[IFNAMSIZ]; - uint16_t queue_idx; - int pmd_zc; + int start_queue_idx; + int queue_cnt; + int max_queue_cnt; + int combined_queue_cnt; + struct rte_ether_addr eth_addr; - struct rte_mempool *mb_pool_share; - struct pkt_rx_queue rx_queues[ETH_AF_XDP_MAX_QUEUE_PAIRS]; - struct pkt_tx_queue tx_queues[ETH_AF_XDP_MAX_QUEUE_PAIRS]; + struct pkt_rx_queue *rx_queues; + struct pkt_tx_queue *tx_queues; }; #define ETH_AF_XDP_IFACE_ARG "iface" -#define ETH_AF_XDP_QUEUE_IDX_ARG "queue" -#define ETH_AF_XDP_PMD_ZC_ARG "pmd_zero_copy" +#define ETH_AF_XDP_START_QUEUE_ARG "start_queue" +#define ETH_AF_XDP_QUEUE_COUNT_ARG "queue_count" static const char * const valid_arguments[] = { ETH_AF_XDP_IFACE_ARG, - ETH_AF_XDP_QUEUE_IDX_ARG, - ETH_AF_XDP_PMD_ZC_ARG, + ETH_AF_XDP_START_QUEUE_ARG, + ETH_AF_XDP_QUEUE_COUNT_ARG, NULL }; @@ -136,8 +149,39 @@ static const struct rte_eth_link pmd_link = { .link_autoneg = ETH_LINK_AUTONEG }; +#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) static inline int -reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size) +reserve_fill_queue_zc(struct xsk_umem_info *umem, uint16_t reserve_size, + struct rte_mbuf **bufs) +{ + struct xsk_ring_prod *fq = &umem->fq; + uint32_t idx; + uint16_t i; + + if (unlikely(!xsk_ring_prod__reserve(fq, reserve_size, &idx))) { + for (i = 0; i < reserve_size; i++) + rte_pktmbuf_free(bufs[i]); + AF_XDP_LOG(DEBUG, "Failed to reserve enough fq descs.\n"); + return -1; + } + + for (i = 0; i < reserve_size; i++) { + __u64 *fq_addr; + uint64_t addr; + + fq_addr = xsk_ring_prod__fill_addr(fq, idx++); + addr = (uint64_t)bufs[i] - (uint64_t)umem->buffer; + *fq_addr = addr; + } + + xsk_ring_prod__submit(fq, reserve_size); + + return 0; +} +#else +static inline int +reserve_fill_queue_cp(struct xsk_umem_info *umem, uint16_t reserve_size, + struct rte_mbuf **bufs __rte_unused) { struct xsk_ring_prod *fq = &umem->fq; void *addrs[reserve_size]; @@ -168,67 +212,129 @@ reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size) return 0; } +#endif -static void -umem_buf_release_to_fq(void *addr, void *opaque) +static inline int +reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size, + struct rte_mbuf **bufs) { - struct xsk_umem_info *umem = (struct xsk_umem_info *)opaque; - uint64_t umem_addr = (uint64_t)addr - umem->mz->addr_64; - - rte_ring_enqueue(umem->buf_ring, (void *)umem_addr); +#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) + return reserve_fill_queue_zc(umem, reserve_size, bufs); +#else + return reserve_fill_queue_cp(umem, reserve_size, bufs); +#endif } +#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) static uint16_t -eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) { struct pkt_rx_queue *rxq = queue; struct xsk_ring_cons *rx = &rxq->rx; struct xsk_umem_info *umem = rxq->umem; - struct xsk_ring_prod *fq = &umem->fq; uint32_t idx_rx = 0; - uint32_t free_thresh = fq->size >> 1; - int pmd_zc = umem->pmd_zc; - struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE]; - unsigned long dropped = 0; unsigned long rx_bytes = 0; int rcvd, i; + struct rte_mbuf *fq_bufs[ETH_AF_XDP_RX_BATCH_SIZE]; - nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_RX_BATCH_SIZE); + /* allocate bufs for fill queue replenishment after rx */ + if (rte_pktmbuf_alloc_bulk(umem->mb_pool, fq_bufs, nb_pkts)) { + AF_XDP_LOG(DEBUG, + "Failed to get enough buffers for fq.\n"); + return -1; + } + + rcvd = xsk_ring_cons__peek(rx, nb_pkts, &idx_rx); + + if (rcvd == 0) { +#if defined(XDP_USE_NEED_WAKEUP) + if (xsk_ring_prod__needs_wakeup(&umem->fq)) + (void)poll(rxq->fds, 1, 1000); +#endif + + goto out; + } + + for (i = 0; i < rcvd; i++) { + const struct xdp_desc *desc; + uint64_t addr; + uint32_t len; + uint64_t offset; + + desc = xsk_ring_cons__rx_desc(rx, idx_rx++); + addr = desc->addr; + len = desc->len; + + offset = xsk_umem__extract_offset(addr); + addr = xsk_umem__extract_addr(addr); + + bufs[i] = (struct rte_mbuf *) + xsk_umem__get_data(umem->buffer, addr); + bufs[i]->data_off = offset - sizeof(struct rte_mbuf); + + rte_pktmbuf_pkt_len(bufs[i]) = len; + rte_pktmbuf_data_len(bufs[i]) = len; + rx_bytes += len; + } + + xsk_ring_cons__release(rx, rcvd); + + (void)reserve_fill_queue(umem, rcvd, fq_bufs); + + /* statistics */ + rxq->stats.rx_pkts += rcvd; + rxq->stats.rx_bytes += rx_bytes; + +out: + if (rcvd != nb_pkts) + rte_mempool_put_bulk(umem->mb_pool, (void **)&fq_bufs[rcvd], + nb_pkts - rcvd); + + return rcvd; +} +#else +static uint16_t +af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +{ + struct pkt_rx_queue *rxq = queue; + struct xsk_ring_cons *rx = &rxq->rx; + struct xsk_umem_info *umem = rxq->umem; + struct xsk_ring_prod *fq = &umem->fq; + uint32_t idx_rx = 0; + unsigned long rx_bytes = 0; + int rcvd, i; + uint32_t free_thresh = fq->size >> 1; + struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE]; if (unlikely(rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, nb_pkts) != 0)) return 0; rcvd = xsk_ring_cons__peek(rx, nb_pkts, &idx_rx); - if (rcvd == 0) + if (rcvd == 0) { +#if defined(XDP_USE_NEED_WAKEUP) + if (xsk_ring_prod__needs_wakeup(fq)) + (void)poll(rxq->fds, 1, 1000); +#endif + goto out; + } if (xsk_prod_nb_free(fq, free_thresh) >= free_thresh) - (void)reserve_fill_queue(umem, ETH_AF_XDP_RX_BATCH_SIZE); + (void)reserve_fill_queue(umem, ETH_AF_XDP_RX_BATCH_SIZE, NULL); for (i = 0; i < rcvd; i++) { const struct xdp_desc *desc; uint64_t addr; uint32_t len; void *pkt; - uint16_t buf_len = ETH_AF_XDP_FRAME_SIZE; - struct rte_mbuf_ext_shared_info *shinfo; desc = xsk_ring_cons__rx_desc(rx, idx_rx++); addr = desc->addr; len = desc->len; pkt = xsk_umem__get_data(rxq->umem->mz->addr, addr); - if (pmd_zc) { - shinfo = rte_pktmbuf_ext_shinfo_init_helper(pkt, - &buf_len, umem_buf_release_to_fq, umem); - - rte_pktmbuf_attach_extbuf(mbufs[i], pkt, 0, buf_len, - shinfo); - } else { - rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), - pkt, len); - rte_ring_enqueue(umem->buf_ring, (void *)addr); - } + rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), pkt, len); + rte_ring_enqueue(umem->buf_ring, (void *)addr); rte_pktmbuf_pkt_len(mbufs[i]) = len; rte_pktmbuf_data_len(mbufs[i]) = len; rx_bytes += len; @@ -238,7 +344,7 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) xsk_ring_cons__release(rx, rcvd); /* statistics */ - rxq->stats.rx_pkts += (rcvd - dropped); + rxq->stats.rx_pkts += rcvd; rxq->stats.rx_bytes += rx_bytes; out: @@ -248,6 +354,19 @@ out: return rcvd; } +#endif + +static uint16_t +eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +{ + nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_RX_BATCH_SIZE); + +#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) + return af_xdp_rx_zc(queue, bufs, nb_pkts); +#else + return af_xdp_rx_cp(queue, bufs, nb_pkts); +#endif +} static void pull_umem_cq(struct xsk_umem_info *umem, int size) @@ -261,7 +380,13 @@ pull_umem_cq(struct xsk_umem_info *umem, int size) for (i = 0; i < n; i++) { uint64_t addr; addr = *xsk_ring_cons__comp_addr(cq, idx_cq++); +#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) + addr = xsk_umem__extract_addr(addr); + rte_pktmbuf_free((struct rte_mbuf *) + xsk_umem__get_data(umem->buffer, addr)); +#else rte_ring_enqueue(umem->buf_ring, (void *)addr); +#endif } xsk_ring_cons__release(cq, n); @@ -270,36 +395,108 @@ pull_umem_cq(struct xsk_umem_info *umem, int size) static void kick_tx(struct pkt_tx_queue *txq) { - struct xsk_umem_info *umem = txq->pair->umem; + struct xsk_umem_info *umem = txq->umem; - while (send(xsk_socket__fd(txq->pair->xsk), NULL, - 0, MSG_DONTWAIT) < 0) { - /* some thing unexpected */ - if (errno != EBUSY && errno != EAGAIN && errno != EINTR) - break; - - /* pull from completion queue to leave more space */ - if (errno == EAGAIN) - pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE); - } +#if defined(XDP_USE_NEED_WAKEUP) + if (xsk_ring_prod__needs_wakeup(&txq->tx)) +#endif + while (send(xsk_socket__fd(txq->pair->xsk), NULL, + 0, MSG_DONTWAIT) < 0) { + /* some thing unexpected */ + if (errno != EBUSY && errno != EAGAIN && errno != EINTR) + break; + + /* pull from completion queue to leave more space */ + if (errno == EAGAIN) + pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE); + } +#ifndef XDP_UMEM_UNALIGNED_CHUNK_FLAG pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE); +#endif } -static inline bool -in_umem_range(struct xsk_umem_info *umem, uint64_t addr) +#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) +static uint16_t +af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) { - uint64_t mz_base_addr = umem->mz->addr_64; + struct pkt_tx_queue *txq = queue; + struct xsk_umem_info *umem = txq->umem; + struct rte_mbuf *mbuf; + unsigned long tx_bytes = 0; + int i; + uint32_t idx_tx; + uint16_t count = 0; + struct xdp_desc *desc; + uint64_t addr, offset; - return addr >= mz_base_addr && addr < mz_base_addr + umem->mz->len; -} + pull_umem_cq(umem, nb_pkts); + + for (i = 0; i < nb_pkts; i++) { + mbuf = bufs[i]; + + if (mbuf->pool == umem->mb_pool) { + if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) { + kick_tx(txq); + goto out; + } + desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx); + desc->len = mbuf->pkt_len; + addr = (uint64_t)mbuf - (uint64_t)umem->buffer; + offset = rte_pktmbuf_mtod(mbuf, uint64_t) - + (uint64_t)mbuf; + offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT; + desc->addr = addr | offset; + count++; + } else { + struct rte_mbuf *local_mbuf = + rte_pktmbuf_alloc(umem->mb_pool); + void *pkt; + + if (local_mbuf == NULL) + goto out; + + if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) { + rte_pktmbuf_free(local_mbuf); + kick_tx(txq); + goto out; + } + + desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx); + desc->len = mbuf->pkt_len; + + addr = (uint64_t)local_mbuf - (uint64_t)umem->buffer; + offset = rte_pktmbuf_mtod(local_mbuf, uint64_t) - + (uint64_t)local_mbuf; + pkt = xsk_umem__get_data(umem->buffer, addr + offset); + offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT; + desc->addr = addr | offset; + rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), + desc->len); + rte_pktmbuf_free(mbuf); + count++; + } + + tx_bytes += mbuf->pkt_len; + } + + kick_tx(txq); + +out: + xsk_ring_prod__submit(&txq->tx, count); + + txq->stats.tx_pkts += count; + txq->stats.tx_bytes += tx_bytes; + txq->stats.tx_dropped += nb_pkts - count; + return count; +} +#else static uint16_t -eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) { struct pkt_tx_queue *txq = queue; - struct xsk_umem_info *umem = txq->pair->umem; + struct xsk_umem_info *umem = txq->umem; struct rte_mbuf *mbuf; - int pmd_zc = umem->pmd_zc; void *addrs[ETH_AF_XDP_TX_BATCH_SIZE]; unsigned long tx_bytes = 0; int i; @@ -328,24 +525,12 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) mbuf = bufs[i]; desc->len = mbuf->pkt_len; - /* - * We need to make sure the external mbuf address is within - * current port's umem memzone range - */ - if (pmd_zc && RTE_MBUF_HAS_EXTBUF(mbuf) && - in_umem_range(umem, (uint64_t)mbuf->buf_addr)) { - desc->addr = (uint64_t)mbuf->buf_addr - - umem->mz->addr_64; - mbuf->buf_addr = xsk_umem__get_data(umem->mz->addr, - (uint64_t)addrs[i]); - } else { - desc->addr = (uint64_t)addrs[i]; - pkt = xsk_umem__get_data(umem->mz->addr, - desc->addr); - rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), - desc->len); - } + desc->addr = (uint64_t)addrs[i]; + pkt = xsk_umem__get_data(umem->mz->addr, + desc->addr); + rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), desc->len); tx_bytes += mbuf->pkt_len; + rte_pktmbuf_free(mbuf); } xsk_ring_prod__submit(&txq->tx, nb_pkts); @@ -355,11 +540,19 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) txq->stats.tx_pkts += nb_pkts; txq->stats.tx_bytes += tx_bytes; - for (i = 0; i < nb_pkts; i++) - rte_pktmbuf_free(bufs[i]); - return nb_pkts; } +#endif + +static uint16_t +eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +{ +#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) + return af_xdp_tx_zc(queue, bufs, nb_pkts); +#else + return af_xdp_tx_cp(queue, bufs, nb_pkts); +#endif +} static int eth_dev_start(struct rte_eth_dev *dev) @@ -386,7 +579,7 @@ eth_dev_configure(struct rte_eth_dev *dev) return 0; } -static void +static int eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct pmd_internals *internals = dev->data->dev_private; @@ -394,8 +587,8 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->if_index = internals->if_index; dev_info->max_mac_addrs = 1; dev_info->max_rx_pktlen = ETH_FRAME_LEN; - dev_info->max_rx_queues = 1; - dev_info->max_tx_queues = 1; + dev_info->max_rx_queues = internals->queue_cnt; + dev_info->max_tx_queues = internals->queue_cnt; dev_info->min_mtu = RTE_ETHER_MIN_MTU; dev_info->max_mtu = ETH_AF_XDP_FRAME_SIZE - ETH_AF_XDP_DATA_HEADROOM; @@ -404,6 +597,8 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->default_txportconf.nb_queues = 1; dev_info->default_rxportconf.ring_size = ETH_AF_XDP_DFLT_NUM_DESCS; dev_info->default_txportconf.ring_size = ETH_AF_XDP_DFLT_NUM_DESCS; + + return 0; } static int @@ -412,21 +607,24 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) struct pmd_internals *internals = dev->data->dev_private; struct xdp_statistics xdp_stats; struct pkt_rx_queue *rxq; + struct pkt_tx_queue *txq; socklen_t optlen; int i, ret; for (i = 0; i < dev->data->nb_rx_queues; i++) { optlen = sizeof(struct xdp_statistics); rxq = &internals->rx_queues[i]; - stats->q_ipackets[i] = internals->rx_queues[i].stats.rx_pkts; - stats->q_ibytes[i] = internals->rx_queues[i].stats.rx_bytes; + txq = rxq->pair; + stats->q_ipackets[i] = rxq->stats.rx_pkts; + stats->q_ibytes[i] = rxq->stats.rx_bytes; - stats->q_opackets[i] = internals->tx_queues[i].stats.tx_pkts; - stats->q_obytes[i] = internals->tx_queues[i].stats.tx_bytes; + stats->q_opackets[i] = txq->stats.tx_pkts; + stats->q_obytes[i] = txq->stats.tx_bytes; stats->ipackets += stats->q_ipackets[i]; stats->ibytes += stats->q_ibytes[i]; - stats->imissed += internals->rx_queues[i].stats.rx_dropped; + stats->imissed += rxq->stats.rx_dropped; + stats->oerrors += txq->stats.tx_dropped; ret = getsockopt(xsk_socket__fd(rxq->xsk), SOL_XDP, XDP_STATISTICS, &xdp_stats, &optlen); if (ret != 0) { @@ -436,25 +634,26 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) stats->imissed += xdp_stats.rx_dropped; stats->opackets += stats->q_opackets[i]; - stats->oerrors += internals->tx_queues[i].stats.err_pkts; stats->obytes += stats->q_obytes[i]; } return 0; } -static void +static int eth_stats_reset(struct rte_eth_dev *dev) { struct pmd_internals *internals = dev->data->dev_private; int i; - for (i = 0; i < ETH_AF_XDP_MAX_QUEUE_PAIRS; i++) { + for (i = 0; i < internals->queue_cnt; i++) { memset(&internals->rx_queues[i].stats, 0, sizeof(struct rx_stats)); memset(&internals->tx_queues[i].stats, 0, sizeof(struct tx_stats)); } + + return 0; } static void @@ -474,11 +673,16 @@ remove_xdp_program(struct pmd_internals *internals) static void xdp_umem_destroy(struct xsk_umem_info *umem) { +#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) + rte_mempool_free(umem->mb_pool); + umem->mb_pool = NULL; +#else rte_memzone_free(umem->mz); umem->mz = NULL; rte_ring_free(umem->buf_ring); umem->buf_ring = NULL; +#endif rte_free(umem); umem = NULL; @@ -494,13 +698,17 @@ eth_dev_close(struct rte_eth_dev *dev) AF_XDP_LOG(INFO, "Closing AF_XDP ethdev on numa socket %u\n", rte_socket_id()); - for (i = 0; i < ETH_AF_XDP_MAX_QUEUE_PAIRS; i++) { + for (i = 0; i < internals->queue_cnt; i++) { rxq = &internals->rx_queues[i]; if (rxq->umem == NULL) break; xsk_socket__delete(rxq->xsk); (void)xsk_umem__delete(rxq->umem->umem); xdp_umem_destroy(rxq->umem); + + /* free pkt_tx_queue */ + rte_free(rxq->pair); + rte_free(rxq); } /* @@ -524,8 +732,58 @@ eth_link_update(struct rte_eth_dev *dev __rte_unused, return 0; } +#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) +static inline uint64_t get_base_addr(struct rte_mempool *mp) +{ + struct rte_mempool_memhdr *memhdr; + + memhdr = STAILQ_FIRST(&mp->mem_list); + return (uint64_t)memhdr->addr & ~(getpagesize() - 1); +} + +static struct +xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals __rte_unused, + struct pkt_rx_queue *rxq) +{ + struct xsk_umem_info *umem; + int ret; + struct xsk_umem_config usr_config = { + .fill_size = ETH_AF_XDP_DFLT_NUM_DESCS, + .comp_size = ETH_AF_XDP_DFLT_NUM_DESCS, + .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG}; + void *base_addr = NULL; + struct rte_mempool *mb_pool = rxq->mb_pool; + + usr_config.frame_size = rte_pktmbuf_data_room_size(mb_pool) + + ETH_AF_XDP_MBUF_OVERHEAD + + mb_pool->private_data_size; + usr_config.frame_headroom = ETH_AF_XDP_DATA_HEADROOM + + mb_pool->private_data_size; + + umem = rte_zmalloc_socket("umem", sizeof(*umem), 0, rte_socket_id()); + if (umem == NULL) { + AF_XDP_LOG(ERR, "Failed to allocate umem info"); + return NULL; + } + + umem->mb_pool = mb_pool; + base_addr = (void *)get_base_addr(mb_pool); + + ret = xsk_umem__create(&umem->umem, base_addr, + mb_pool->populated_size * usr_config.frame_size, + &umem->fq, &umem->cq, + &usr_config); + + if (ret) { + AF_XDP_LOG(ERR, "Failed to create umem"); + goto err; + } + umem->buffer = base_addr; + +#else static struct -xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals) +xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + struct pkt_rx_queue *rxq) { struct xsk_umem_info *umem; const struct rte_memzone *mz; @@ -546,11 +804,11 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals) } snprintf(ring_name, sizeof(ring_name), "af_xdp_ring_%s_%u", - internals->if_name, internals->queue_idx); + internals->if_name, rxq->xsk_queue_idx); umem->buf_ring = rte_ring_create(ring_name, ETH_AF_XDP_NUM_BUFFERS, rte_socket_id(), - 0x0); + RING_F_SP_ENQ | RING_F_SC_DEQ); if (umem->buf_ring == NULL) { AF_XDP_LOG(ERR, "Failed to create rte_ring\n"); goto err; @@ -562,7 +820,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals) ETH_AF_XDP_DATA_HEADROOM)); snprintf(mz_name, sizeof(mz_name), "af_xdp_umem_%s_%u", - internals->if_name, internals->queue_idx); + internals->if_name, rxq->xsk_queue_idx); mz = rte_memzone_reserve_aligned(mz_name, ETH_AF_XDP_NUM_BUFFERS * ETH_AF_XDP_FRAME_SIZE, rte_socket_id(), RTE_MEMZONE_IOVA_CONTIG, @@ -583,6 +841,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals) } umem->mz = mz; +#endif return umem; err: @@ -597,27 +856,39 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, struct xsk_socket_config cfg; struct pkt_tx_queue *txq = rxq->pair; int ret = 0; - int reserve_size; + int reserve_size = ETH_AF_XDP_DFLT_NUM_DESCS / 2; + struct rte_mbuf *fq_bufs[reserve_size]; - rxq->umem = xdp_umem_configure(internals); + rxq->umem = xdp_umem_configure(internals, rxq); if (rxq->umem == NULL) return -ENOMEM; + txq->umem = rxq->umem; cfg.rx_size = ring_size; cfg.tx_size = ring_size; cfg.libbpf_flags = 0; cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; cfg.bind_flags = 0; + +#if defined(XDP_USE_NEED_WAKEUP) + cfg.bind_flags |= XDP_USE_NEED_WAKEUP; +#endif + ret = xsk_socket__create(&rxq->xsk, internals->if_name, - internals->queue_idx, rxq->umem->umem, &rxq->rx, + rxq->xsk_queue_idx, rxq->umem->umem, &rxq->rx, &txq->tx, &cfg); if (ret) { AF_XDP_LOG(ERR, "Failed to create xsk socket.\n"); goto err; } - reserve_size = ETH_AF_XDP_DFLT_NUM_DESCS / 2; - ret = reserve_fill_queue(rxq->umem, reserve_size); +#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) + if (rte_pktmbuf_alloc_bulk(rxq->umem->mb_pool, fq_bufs, reserve_size)) { + AF_XDP_LOG(DEBUG, "Failed to get enough buffers for fq.\n"); + goto err; + } +#endif + ret = reserve_fill_queue(rxq->umem, reserve_size, fq_bufs); if (ret) { xsk_socket__delete(rxq->xsk); AF_XDP_LOG(ERR, "Failed to reserve fill queue.\n"); @@ -632,20 +903,6 @@ err: return ret; } -static void -queue_reset(struct pmd_internals *internals, uint16_t queue_idx) -{ - struct pkt_rx_queue *rxq = &internals->rx_queues[queue_idx]; - struct pkt_tx_queue *txq = rxq->pair; - - memset(rxq, 0, sizeof(*rxq)); - memset(txq, 0, sizeof(*txq)); - rxq->pair = txq; - txq->pair = rxq; - rxq->queue_idx = queue_idx; - txq->queue_idx = queue_idx; -} - static int eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, @@ -655,12 +912,16 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, struct rte_mempool *mb_pool) { struct pmd_internals *internals = dev->data->dev_private; - uint32_t buf_size, data_size; struct pkt_rx_queue *rxq; int ret; rxq = &internals->rx_queues[rx_queue_id]; - queue_reset(internals, rx_queue_id); + + AF_XDP_LOG(INFO, "Set up rx queue, rx queue id: %d, xsk queue id: %d\n", + rx_queue_id, rxq->xsk_queue_idx); + +#ifndef XDP_UMEM_UNALIGNED_CHUNK_FLAG + uint32_t buf_size, data_size; /* Now get the space available for data in the mbuf */ buf_size = rte_pktmbuf_data_room_size(mb_pool) - @@ -673,6 +934,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, ret = -ENOMEM; goto err; } +#endif rxq->mb_pool = mb_pool; @@ -682,13 +944,13 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, goto err; } - rxq->umem->pmd_zc = internals->pmd_zc; + rxq->fds[0].fd = xsk_socket__fd(rxq->xsk); + rxq->fds[0].events = POLLIN; dev->data->rx_queues[rx_queue_id] = rxq; return 0; err: - queue_reset(internals, rx_queue_id); return ret; } @@ -727,41 +989,47 @@ eth_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) return (ret < 0) ? -errno : 0; } -static void +static int eth_dev_change_flags(char *if_name, uint32_t flags, uint32_t mask) { struct ifreq ifr; + int ret = 0; int s; s = socket(PF_INET, SOCK_DGRAM, 0); if (s < 0) - return; + return -errno; strlcpy(ifr.ifr_name, if_name, IFNAMSIZ); - if (ioctl(s, SIOCGIFFLAGS, &ifr) < 0) + if (ioctl(s, SIOCGIFFLAGS, &ifr) < 0) { + ret = -errno; goto out; + } ifr.ifr_flags &= mask; ifr.ifr_flags |= flags; - if (ioctl(s, SIOCSIFFLAGS, &ifr) < 0) + if (ioctl(s, SIOCSIFFLAGS, &ifr) < 0) { + ret = -errno; goto out; + } out: close(s); + return ret; } -static void +static int eth_dev_promiscuous_enable(struct rte_eth_dev *dev) { struct pmd_internals *internals = dev->data->dev_private; - eth_dev_change_flags(internals->if_name, IFF_PROMISC, ~0); + return eth_dev_change_flags(internals->if_name, IFF_PROMISC, ~0); } -static void +static int eth_dev_promiscuous_disable(struct rte_eth_dev *dev) { struct pmd_internals *internals = dev->data->dev_private; - eth_dev_change_flags(internals->if_name, 0, ~IFF_PROMISC); + return eth_dev_change_flags(internals->if_name, 0, ~IFF_PROMISC); } static const struct eth_dev_ops ops = { @@ -818,8 +1086,49 @@ parse_name_arg(const char *key __rte_unused, } static int -parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *queue_idx, - int *pmd_zc) +xdp_get_channels_info(const char *if_name, int *max_queues, + int *combined_queues) +{ + struct ethtool_channels channels; + struct ifreq ifr; + int fd, ret; + + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) + return -1; + + channels.cmd = ETHTOOL_GCHANNELS; + ifr.ifr_data = (void *)&channels; + strncpy(ifr.ifr_name, if_name, IFNAMSIZ); + ret = ioctl(fd, SIOCETHTOOL, &ifr); + if (ret) { + if (errno == EOPNOTSUPP) { + ret = 0; + } else { + ret = -errno; + goto out; + } + } + + if (channels.max_combined == 0 || errno == EOPNOTSUPP) { + /* If the device says it has no channels, then all traffic + * is sent to a single stream, so max queues = 1. + */ + *max_queues = 1; + *combined_queues = 1; + } else { + *max_queues = channels.max_combined; + *combined_queues = channels.combined_count; + } + + out: + close(fd); + return ret; +} + +static int +parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, + int *queue_cnt) { int ret; @@ -828,15 +1137,17 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *queue_idx, if (ret < 0) goto free_kvlist; - ret = rte_kvargs_process(kvlist, ETH_AF_XDP_QUEUE_IDX_ARG, - &parse_integer_arg, queue_idx); + ret = rte_kvargs_process(kvlist, ETH_AF_XDP_START_QUEUE_ARG, + &parse_integer_arg, start_queue); if (ret < 0) goto free_kvlist; - ret = rte_kvargs_process(kvlist, ETH_AF_XDP_PMD_ZC_ARG, - &parse_integer_arg, pmd_zc); - if (ret < 0) + ret = rte_kvargs_process(kvlist, ETH_AF_XDP_QUEUE_COUNT_ARG, + &parse_integer_arg, queue_cnt); + if (ret < 0 || *queue_cnt <= 0) { + ret = -EINVAL; goto free_kvlist; + } free_kvlist: rte_kvargs_free(kvlist); @@ -874,8 +1185,8 @@ error: } static struct rte_eth_dev * -init_internals(struct rte_vdev_device *dev, const char *if_name, int queue_idx, - int pmd_zc) +init_internals(struct rte_vdev_device *dev, const char *if_name, + int start_queue_idx, int queue_cnt) { const char *name = rte_vdev_device_name(dev); const unsigned int numa_node = dev->device.numa_node; @@ -888,23 +1199,53 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, int queue_idx, if (internals == NULL) return NULL; - internals->queue_idx = queue_idx; - internals->pmd_zc = pmd_zc; + internals->start_queue_idx = start_queue_idx; + internals->queue_cnt = queue_cnt; strlcpy(internals->if_name, if_name, IFNAMSIZ); - for (i = 0; i < ETH_AF_XDP_MAX_QUEUE_PAIRS; i++) { + if (xdp_get_channels_info(if_name, &internals->max_queue_cnt, + &internals->combined_queue_cnt)) { + AF_XDP_LOG(ERR, "Failed to get channel info of interface: %s\n", + if_name); + goto err_free_internals; + } + + if (queue_cnt > internals->combined_queue_cnt) { + AF_XDP_LOG(ERR, "Specified queue count %d is larger than combined queue count %d.\n", + queue_cnt, internals->combined_queue_cnt); + goto err_free_internals; + } + + internals->rx_queues = rte_zmalloc_socket(NULL, + sizeof(struct pkt_rx_queue) * queue_cnt, + 0, numa_node); + if (internals->rx_queues == NULL) { + AF_XDP_LOG(ERR, "Failed to allocate memory for rx queues.\n"); + goto err_free_internals; + } + + internals->tx_queues = rte_zmalloc_socket(NULL, + sizeof(struct pkt_tx_queue) * queue_cnt, + 0, numa_node); + if (internals->tx_queues == NULL) { + AF_XDP_LOG(ERR, "Failed to allocate memory for tx queues.\n"); + goto err_free_rx; + } + for (i = 0; i < queue_cnt; i++) { internals->tx_queues[i].pair = &internals->rx_queues[i]; internals->rx_queues[i].pair = &internals->tx_queues[i]; + internals->rx_queues[i].xsk_queue_idx = start_queue_idx + i; + internals->tx_queues[i].xsk_queue_idx = start_queue_idx + i; } ret = get_iface_info(if_name, &internals->eth_addr, &internals->if_index); if (ret) - goto err; + goto err_free_tx; eth_dev = rte_eth_vdev_allocate(dev, 0); if (eth_dev == NULL) - goto err; + goto err_free_tx; eth_dev->data->dev_private = internals; eth_dev->data->dev_link = pmd_link; @@ -915,12 +1256,17 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, int queue_idx, /* Let rte_eth_dev_close() release the port resources. */ eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE; - if (internals->pmd_zc) - AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n"); +#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) + AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n"); +#endif return eth_dev; -err: +err_free_tx: + rte_free(internals->tx_queues); +err_free_rx: + rte_free(internals->rx_queues); +err_free_internals: rte_free(internals); return NULL; } @@ -930,10 +1276,10 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) { struct rte_kvargs *kvlist; char if_name[IFNAMSIZ] = {'\0'}; - int xsk_queue_idx = ETH_AF_XDP_DFLT_QUEUE_IDX; + int xsk_start_queue_idx = ETH_AF_XDP_DFLT_START_QUEUE_IDX; + int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT; struct rte_eth_dev *eth_dev = NULL; const char *name; - int pmd_zc = 0; AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n", rte_vdev_device_name(dev)); @@ -960,7 +1306,8 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) if (dev->device.numa_node == SOCKET_ID_ANY) dev->device.numa_node = rte_socket_id(); - if (parse_parameters(kvlist, if_name, &xsk_queue_idx, &pmd_zc) < 0) { + if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx, + &xsk_queue_cnt) < 0) { AF_XDP_LOG(ERR, "Invalid kvargs value\n"); return -EINVAL; } @@ -970,7 +1317,8 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) return -EINVAL; } - eth_dev = init_internals(dev, if_name, xsk_queue_idx, pmd_zc); + eth_dev = init_internals(dev, if_name, xsk_start_queue_idx, + xsk_queue_cnt); if (eth_dev == NULL) { AF_XDP_LOG(ERR, "Failed to init internals\n"); return -1; @@ -1012,8 +1360,8 @@ static struct rte_vdev_driver pmd_af_xdp_drv = { RTE_PMD_REGISTER_VDEV(net_af_xdp, pmd_af_xdp_drv); RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp, "iface= " - "queue= " - "pmd_zero_copy=<0|1>"); + "start_queue= " + "queue_count= "); RTE_INIT(af_xdp_init_log) {