X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Faf_xdp%2Frte_eth_af_xdp.c;h=aa716f3195c91358604416d87a09c6323cb28d21;hb=d1c2f76b440a7468878d246332d9a6ebda5deb43;hp=007a1c6b48b677fbd7ff011f265251d1f6c95fed;hpb=f1debd77efaf65e6991036cf500cd58466c4f7bb;p=dpdk.git diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c index 007a1c6b48..aa716f3195 100644 --- a/drivers/net/af_xdp/rte_eth_af_xdp.c +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -12,6 +13,8 @@ #include #include #include +#include +#include #include "af_xdp_deps.h" #include @@ -53,16 +56,16 @@ static int af_xdp_logtype; rte_log(RTE_LOG_ ## level, af_xdp_logtype, \ "%s(): " fmt, __func__, ##args) -#define ETH_AF_XDP_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE +#define ETH_AF_XDP_FRAME_SIZE 2048 #define ETH_AF_XDP_NUM_BUFFERS 4096 #define ETH_AF_XDP_DATA_HEADROOM 0 #define ETH_AF_XDP_DFLT_NUM_DESCS XSK_RING_CONS__DEFAULT_NUM_DESCS -#define ETH_AF_XDP_DFLT_QUEUE_IDX 0 +#define ETH_AF_XDP_DFLT_START_QUEUE_IDX 0 +#define ETH_AF_XDP_DFLT_QUEUE_COUNT 1 #define ETH_AF_XDP_RX_BATCH_SIZE 32 #define ETH_AF_XDP_TX_BATCH_SIZE 32 -#define ETH_AF_XDP_MAX_QUEUE_PAIRS 16 struct xsk_umem_info { struct xsk_ring_prod fq; @@ -70,6 +73,7 @@ struct xsk_umem_info { struct xsk_umem *umem; struct rte_ring *buf_ring; const struct rte_memzone *mz; + int pmd_zc; }; struct rx_stats { @@ -87,12 +91,12 @@ struct pkt_rx_queue { struct rx_stats stats; struct pkt_tx_queue *pair; - uint16_t queue_idx; + struct pollfd fds[1]; + int xsk_queue_idx; }; struct tx_stats { uint64_t tx_pkts; - uint64_t err_pkts; uint64_t tx_bytes; }; @@ -102,27 +106,34 @@ struct pkt_tx_queue { struct tx_stats stats; struct pkt_rx_queue *pair; - uint16_t queue_idx; + int xsk_queue_idx; }; struct pmd_internals { int if_index; char if_name[IFNAMSIZ]; - uint16_t queue_idx; - struct ether_addr eth_addr; - struct xsk_umem_info *umem; - struct rte_mempool *mb_pool_share; + int start_queue_idx; + int queue_cnt; + int max_queue_cnt; + int combined_queue_cnt; + + int pmd_zc; + struct rte_ether_addr eth_addr; - struct pkt_rx_queue rx_queues[ETH_AF_XDP_MAX_QUEUE_PAIRS]; - struct pkt_tx_queue tx_queues[ETH_AF_XDP_MAX_QUEUE_PAIRS]; + struct pkt_rx_queue *rx_queues; + struct pkt_tx_queue *tx_queues; }; #define ETH_AF_XDP_IFACE_ARG "iface" -#define ETH_AF_XDP_QUEUE_IDX_ARG "queue" +#define ETH_AF_XDP_START_QUEUE_ARG "start_queue" +#define ETH_AF_XDP_QUEUE_COUNT_ARG "queue_count" +#define ETH_AF_XDP_PMD_ZC_ARG "pmd_zero_copy" static const char * const valid_arguments[] = { ETH_AF_XDP_IFACE_ARG, - ETH_AF_XDP_QUEUE_IDX_ARG, + ETH_AF_XDP_START_QUEUE_ARG, + ETH_AF_XDP_QUEUE_COUNT_ARG, + ETH_AF_XDP_PMD_ZC_ARG, NULL }; @@ -134,34 +145,47 @@ static const struct rte_eth_link pmd_link = { }; static inline int -reserve_fill_queue(struct xsk_umem_info *umem, int reserve_size) +reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size) { struct xsk_ring_prod *fq = &umem->fq; + void *addrs[reserve_size]; uint32_t idx; - int i, ret; + uint16_t i; + + if (rte_ring_dequeue_bulk(umem->buf_ring, addrs, reserve_size, NULL) + != reserve_size) { + AF_XDP_LOG(DEBUG, "Failed to get enough buffers for fq.\n"); + return -1; + } - ret = xsk_ring_prod__reserve(fq, reserve_size, &idx); - if (unlikely(!ret)) { - AF_XDP_LOG(ERR, "Failed to reserve enough fq descs.\n"); - return ret; + if (unlikely(!xsk_ring_prod__reserve(fq, reserve_size, &idx))) { + AF_XDP_LOG(DEBUG, "Failed to reserve enough fq descs.\n"); + rte_ring_enqueue_bulk(umem->buf_ring, addrs, + reserve_size, NULL); + return -1; } for (i = 0; i < reserve_size; i++) { __u64 *fq_addr; - void *addr = NULL; - if (rte_ring_dequeue(umem->buf_ring, &addr)) { - i--; - break; - } + fq_addr = xsk_ring_prod__fill_addr(fq, idx++); - *fq_addr = (uint64_t)addr; + *fq_addr = (uint64_t)addrs[i]; } - xsk_ring_prod__submit(fq, i); + xsk_ring_prod__submit(fq, reserve_size); return 0; } +static void +umem_buf_release_to_fq(void *addr, void *opaque) +{ + struct xsk_umem_info *umem = (struct xsk_umem_info *)opaque; + uint64_t umem_addr = (uint64_t)addr - umem->mz->addr_64; + + rte_ring_enqueue(umem->buf_ring, (void *)umem_addr); +} + static uint16_t eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) { @@ -171,42 +195,58 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) struct xsk_ring_prod *fq = &umem->fq; uint32_t idx_rx = 0; uint32_t free_thresh = fq->size >> 1; - struct rte_mbuf *mbufs[ETH_AF_XDP_TX_BATCH_SIZE]; + int pmd_zc = umem->pmd_zc; + struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE]; unsigned long dropped = 0; unsigned long rx_bytes = 0; - uint16_t count = 0; int rcvd, i; - nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_TX_BATCH_SIZE); + nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_RX_BATCH_SIZE); - rcvd = xsk_ring_cons__peek(rx, nb_pkts, &idx_rx); - if (rcvd == 0) + if (unlikely(rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, nb_pkts) != 0)) return 0; + rcvd = xsk_ring_cons__peek(rx, nb_pkts, &idx_rx); + if (rcvd == 0) { +#if defined(XDP_USE_NEED_WAKEUP) + if (xsk_ring_prod__needs_wakeup(fq)) + (void)poll(rxq->fds, 1, 1000); +#endif + + goto out; + } + if (xsk_prod_nb_free(fq, free_thresh) >= free_thresh) (void)reserve_fill_queue(umem, ETH_AF_XDP_RX_BATCH_SIZE); - if (unlikely(rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, rcvd) != 0)) - return 0; - for (i = 0; i < rcvd; i++) { const struct xdp_desc *desc; uint64_t addr; uint32_t len; void *pkt; + uint16_t buf_len = ETH_AF_XDP_FRAME_SIZE; + struct rte_mbuf_ext_shared_info *shinfo; desc = xsk_ring_cons__rx_desc(rx, idx_rx++); addr = desc->addr; len = desc->len; pkt = xsk_umem__get_data(rxq->umem->mz->addr, addr); - rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), pkt, len); + if (pmd_zc) { + shinfo = rte_pktmbuf_ext_shinfo_init_helper(pkt, + &buf_len, umem_buf_release_to_fq, umem); + + rte_pktmbuf_attach_extbuf(mbufs[i], pkt, 0, buf_len, + shinfo); + } else { + rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), + pkt, len); + rte_ring_enqueue(umem->buf_ring, (void *)addr); + } rte_pktmbuf_pkt_len(mbufs[i]) = len; rte_pktmbuf_data_len(mbufs[i]) = len; rx_bytes += len; - bufs[count++] = mbufs[i]; - - rte_ring_enqueue(umem->buf_ring, (void *)addr); + bufs[i] = mbufs[i]; } xsk_ring_cons__release(rx, rcvd); @@ -215,7 +255,12 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) rxq->stats.rx_pkts += (rcvd - dropped); rxq->stats.rx_bytes += rx_bytes; - return count; +out: + if (rcvd != nb_pkts) + rte_mempool_put_bulk(rxq->mb_pool, (void **)&mbufs[rcvd], + nb_pkts - rcvd); + + return rcvd; } static void @@ -241,28 +286,40 @@ kick_tx(struct pkt_tx_queue *txq) { struct xsk_umem_info *umem = txq->pair->umem; - while (send(xsk_socket__fd(txq->pair->xsk), NULL, - 0, MSG_DONTWAIT) < 0) { - /* some thing unexpected */ - if (errno != EBUSY && errno != EAGAIN && errno != EINTR) - break; - - /* pull from completion queue to leave more space */ - if (errno == EAGAIN) - pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE); - } +#if defined(XDP_USE_NEED_WAKEUP) + if (xsk_ring_prod__needs_wakeup(&txq->tx)) +#endif + while (send(xsk_socket__fd(txq->pair->xsk), NULL, + 0, MSG_DONTWAIT) < 0) { + /* some thing unexpected */ + if (errno != EBUSY && errno != EAGAIN && errno != EINTR) + break; + + /* pull from completion queue to leave more space */ + if (errno == EAGAIN) + pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE); + } pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE); } +static inline bool +in_umem_range(struct xsk_umem_info *umem, uint64_t addr) +{ + uint64_t mz_base_addr = umem->mz->addr_64; + + return addr >= mz_base_addr && addr < mz_base_addr + umem->mz->len; +} + static uint16_t eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) { struct pkt_tx_queue *txq = queue; struct xsk_umem_info *umem = txq->pair->umem; struct rte_mbuf *mbuf; + int pmd_zc = umem->pmd_zc; void *addrs[ETH_AF_XDP_TX_BATCH_SIZE]; unsigned long tx_bytes = 0; - int i, valid = 0; + int i; uint32_t idx_tx; nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_TX_BATCH_SIZE); @@ -276,41 +333,48 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) if (xsk_ring_prod__reserve(&txq->tx, nb_pkts, &idx_tx) != nb_pkts) { kick_tx(txq); + rte_ring_enqueue_bulk(umem->buf_ring, addrs, nb_pkts, NULL); return 0; } for (i = 0; i < nb_pkts; i++) { struct xdp_desc *desc; void *pkt; - uint32_t buf_len = ETH_AF_XDP_FRAME_SIZE - - ETH_AF_XDP_DATA_HEADROOM; + desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx + i); mbuf = bufs[i]; - if (mbuf->pkt_len <= buf_len) { - desc->addr = (uint64_t)addrs[valid]; - desc->len = mbuf->pkt_len; + desc->len = mbuf->pkt_len; + + /* + * We need to make sure the external mbuf address is within + * current port's umem memzone range + */ + if (pmd_zc && RTE_MBUF_HAS_EXTBUF(mbuf) && + in_umem_range(umem, (uint64_t)mbuf->buf_addr)) { + desc->addr = (uint64_t)mbuf->buf_addr - + umem->mz->addr_64; + mbuf->buf_addr = xsk_umem__get_data(umem->mz->addr, + (uint64_t)addrs[i]); + } else { + desc->addr = (uint64_t)addrs[i]; pkt = xsk_umem__get_data(umem->mz->addr, - desc->addr); + desc->addr); rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), - desc->len); - valid++; - tx_bytes += mbuf->pkt_len; + desc->len); } - rte_pktmbuf_free(mbuf); + tx_bytes += mbuf->pkt_len; } xsk_ring_prod__submit(&txq->tx, nb_pkts); kick_tx(txq); - if (valid < nb_pkts) - rte_ring_enqueue_bulk(umem->buf_ring, &addrs[valid], - nb_pkts - valid, NULL); - - txq->stats.err_pkts += nb_pkts - valid; - txq->stats.tx_pkts += valid; + txq->stats.tx_pkts += nb_pkts; txq->stats.tx_bytes += tx_bytes; + for (i = 0; i < nb_pkts; i++) + rte_pktmbuf_free(bufs[i]); + return nb_pkts; } @@ -339,7 +403,7 @@ eth_dev_configure(struct rte_eth_dev *dev) return 0; } -static void +static int eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct pmd_internals *internals = dev->data->dev_private; @@ -347,13 +411,18 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->if_index = internals->if_index; dev_info->max_mac_addrs = 1; dev_info->max_rx_pktlen = ETH_FRAME_LEN; - dev_info->max_rx_queues = 1; - dev_info->max_tx_queues = 1; + dev_info->max_rx_queues = internals->queue_cnt; + dev_info->max_tx_queues = internals->queue_cnt; + + dev_info->min_mtu = RTE_ETHER_MIN_MTU; + dev_info->max_mtu = ETH_AF_XDP_FRAME_SIZE - ETH_AF_XDP_DATA_HEADROOM; dev_info->default_rxportconf.nb_queues = 1; dev_info->default_txportconf.nb_queues = 1; dev_info->default_rxportconf.ring_size = ETH_AF_XDP_DFLT_NUM_DESCS; dev_info->default_txportconf.ring_size = ETH_AF_XDP_DFLT_NUM_DESCS; + + return 0; } static int @@ -362,21 +431,23 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) struct pmd_internals *internals = dev->data->dev_private; struct xdp_statistics xdp_stats; struct pkt_rx_queue *rxq; + struct pkt_tx_queue *txq; socklen_t optlen; int i, ret; for (i = 0; i < dev->data->nb_rx_queues; i++) { optlen = sizeof(struct xdp_statistics); rxq = &internals->rx_queues[i]; - stats->q_ipackets[i] = internals->rx_queues[i].stats.rx_pkts; - stats->q_ibytes[i] = internals->rx_queues[i].stats.rx_bytes; + txq = rxq->pair; + stats->q_ipackets[i] = rxq->stats.rx_pkts; + stats->q_ibytes[i] = rxq->stats.rx_bytes; - stats->q_opackets[i] = internals->tx_queues[i].stats.tx_pkts; - stats->q_obytes[i] = internals->tx_queues[i].stats.tx_bytes; + stats->q_opackets[i] = txq->stats.tx_pkts; + stats->q_obytes[i] = txq->stats.tx_bytes; stats->ipackets += stats->q_ipackets[i]; stats->ibytes += stats->q_ibytes[i]; - stats->imissed += internals->rx_queues[i].stats.rx_dropped; + stats->imissed += rxq->stats.rx_dropped; ret = getsockopt(xsk_socket__fd(rxq->xsk), SOL_XDP, XDP_STATISTICS, &xdp_stats, &optlen); if (ret != 0) { @@ -386,7 +457,6 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) stats->imissed += xdp_stats.rx_dropped; stats->opackets += stats->q_opackets[i]; - stats->oerrors += internals->tx_queues[i].stats.err_pkts; stats->obytes += stats->q_obytes[i]; } @@ -399,7 +469,7 @@ eth_stats_reset(struct rte_eth_dev *dev) struct pmd_internals *internals = dev->data->dev_private; int i; - for (i = 0; i < ETH_AF_XDP_MAX_QUEUE_PAIRS; i++) { + for (i = 0; i < internals->queue_cnt; i++) { memset(&internals->rx_queues[i].stats, 0, sizeof(struct rx_stats)); memset(&internals->tx_queues[i].stats, 0, @@ -421,6 +491,19 @@ remove_xdp_program(struct pmd_internals *internals) XDP_FLAGS_UPDATE_IF_NOEXIST); } +static void +xdp_umem_destroy(struct xsk_umem_info *umem) +{ + rte_memzone_free(umem->mz); + umem->mz = NULL; + + rte_ring_free(umem->buf_ring); + umem->buf_ring = NULL; + + rte_free(umem); + umem = NULL; +} + static void eth_dev_close(struct rte_eth_dev *dev) { @@ -431,14 +514,25 @@ eth_dev_close(struct rte_eth_dev *dev) AF_XDP_LOG(INFO, "Closing AF_XDP ethdev on numa socket %u\n", rte_socket_id()); - for (i = 0; i < ETH_AF_XDP_MAX_QUEUE_PAIRS; i++) { + for (i = 0; i < internals->queue_cnt; i++) { rxq = &internals->rx_queues[i]; if (rxq->umem == NULL) break; xsk_socket__delete(rxq->xsk); + (void)xsk_umem__delete(rxq->umem->umem); + xdp_umem_destroy(rxq->umem); + + /* free pkt_tx_queue */ + rte_free(rxq->pair); + rte_free(rxq); } - (void)xsk_umem__delete(internals->umem->umem); + /* + * MAC is not allocated dynamically, setting it to NULL would prevent + * from releasing it in rte_eth_dev_release_port. + */ + dev->data->mac_addrs = NULL; + remove_xdp_program(internals); } @@ -454,21 +548,9 @@ eth_link_update(struct rte_eth_dev *dev __rte_unused, return 0; } -static void -xdp_umem_destroy(struct xsk_umem_info *umem) -{ - rte_memzone_free(umem->mz); - umem->mz = NULL; - - rte_ring_free(umem->buf_ring); - umem->buf_ring = NULL; - - rte_free(umem); - umem = NULL; -} - static struct -xsk_umem_info *xdp_umem_configure(void) +xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + struct pkt_rx_queue *rxq) { struct xsk_umem_info *umem; const struct rte_memzone *mz; @@ -477,6 +559,8 @@ xsk_umem_info *xdp_umem_configure(void) .comp_size = ETH_AF_XDP_DFLT_NUM_DESCS, .frame_size = ETH_AF_XDP_FRAME_SIZE, .frame_headroom = ETH_AF_XDP_DATA_HEADROOM }; + char ring_name[RTE_RING_NAMESIZE]; + char mz_name[RTE_MEMZONE_NAMESIZE]; int ret; uint64_t i; @@ -486,7 +570,9 @@ xsk_umem_info *xdp_umem_configure(void) return NULL; } - umem->buf_ring = rte_ring_create("af_xdp_ring", + snprintf(ring_name, sizeof(ring_name), "af_xdp_ring_%s_%u", + internals->if_name, rxq->xsk_queue_idx); + umem->buf_ring = rte_ring_create(ring_name, ETH_AF_XDP_NUM_BUFFERS, rte_socket_id(), 0x0); @@ -500,7 +586,9 @@ xsk_umem_info *xdp_umem_configure(void) (void *)(i * ETH_AF_XDP_FRAME_SIZE + ETH_AF_XDP_DATA_HEADROOM)); - mz = rte_memzone_reserve_aligned("af_xdp uemem", + snprintf(mz_name, sizeof(mz_name), "af_xdp_umem_%s_%u", + internals->if_name, rxq->xsk_queue_idx); + mz = rte_memzone_reserve_aligned(mz_name, ETH_AF_XDP_NUM_BUFFERS * ETH_AF_XDP_FRAME_SIZE, rte_socket_id(), RTE_MEMZONE_IOVA_CONTIG, getpagesize()); @@ -536,7 +624,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, int ret = 0; int reserve_size; - rxq->umem = xdp_umem_configure(); + rxq->umem = xdp_umem_configure(internals, rxq); if (rxq->umem == NULL) return -ENOMEM; @@ -545,8 +633,13 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, cfg.libbpf_flags = 0; cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; cfg.bind_flags = 0; + +#if defined(XDP_USE_NEED_WAKEUP) + cfg.bind_flags |= XDP_USE_NEED_WAKEUP; +#endif + ret = xsk_socket__create(&rxq->xsk, internals->if_name, - internals->queue_idx, rxq->umem->umem, &rxq->rx, + rxq->xsk_queue_idx, rxq->umem->umem, &rxq->rx, &txq->tx, &cfg); if (ret) { AF_XDP_LOG(ERR, "Failed to create xsk socket.\n"); @@ -569,20 +662,6 @@ err: return ret; } -static void -queue_reset(struct pmd_internals *internals, uint16_t queue_idx) -{ - struct pkt_rx_queue *rxq = &internals->rx_queues[queue_idx]; - struct pkt_tx_queue *txq = rxq->pair; - - memset(rxq, 0, sizeof(*rxq)); - memset(txq, 0, sizeof(*txq)); - rxq->pair = txq; - txq->pair = rxq; - rxq->queue_idx = queue_idx; - txq->queue_idx = queue_idx; -} - static int eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, @@ -597,8 +676,9 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, int ret; rxq = &internals->rx_queues[rx_queue_id]; - queue_reset(internals, rx_queue_id); + AF_XDP_LOG(INFO, "Set up rx queue, rx queue id: %d, xsk queue id: %d\n", + rx_queue_id, rxq->xsk_queue_idx); /* Now get the space available for data in the mbuf */ buf_size = rte_pktmbuf_data_room_size(mb_pool) - RTE_PKTMBUF_HEADROOM; @@ -619,13 +699,15 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, goto err; } - internals->umem = rxq->umem; + rxq->fds[0].fd = xsk_socket__fd(rxq->xsk); + rxq->fds[0].events = POLLIN; + + rxq->umem->pmd_zc = internals->pmd_zc; dev->data->rx_queues[rx_queue_id] = rxq; return 0; err: - queue_reset(internals, rx_queue_id); return ret; } @@ -755,9 +837,49 @@ parse_name_arg(const char *key __rte_unused, } static int -parse_parameters(struct rte_kvargs *kvlist, - char *if_name, - int *queue_idx) +xdp_get_channels_info(const char *if_name, int *max_queues, + int *combined_queues) +{ + struct ethtool_channels channels; + struct ifreq ifr; + int fd, ret; + + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) + return -1; + + channels.cmd = ETHTOOL_GCHANNELS; + ifr.ifr_data = (void *)&channels; + strncpy(ifr.ifr_name, if_name, IFNAMSIZ); + ret = ioctl(fd, SIOCETHTOOL, &ifr); + if (ret) { + if (errno == EOPNOTSUPP) { + ret = 0; + } else { + ret = -errno; + goto out; + } + } + + if (channels.max_combined == 0 || errno == EOPNOTSUPP) { + /* If the device says it has no channels, then all traffic + * is sent to a single stream, so max queues = 1. + */ + *max_queues = 1; + *combined_queues = 1; + } else { + *max_queues = channels.max_combined; + *combined_queues = channels.combined_count; + } + + out: + close(fd); + return ret; +} + +static int +parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, + int *queue_cnt, int *pmd_zc) { int ret; @@ -766,8 +888,20 @@ parse_parameters(struct rte_kvargs *kvlist, if (ret < 0) goto free_kvlist; - ret = rte_kvargs_process(kvlist, ETH_AF_XDP_QUEUE_IDX_ARG, - &parse_integer_arg, queue_idx); + ret = rte_kvargs_process(kvlist, ETH_AF_XDP_START_QUEUE_ARG, + &parse_integer_arg, start_queue); + if (ret < 0) + goto free_kvlist; + + ret = rte_kvargs_process(kvlist, ETH_AF_XDP_QUEUE_COUNT_ARG, + &parse_integer_arg, queue_cnt); + if (ret < 0 || *queue_cnt <= 0) { + ret = -EINVAL; + goto free_kvlist; + } + + ret = rte_kvargs_process(kvlist, ETH_AF_XDP_PMD_ZC_ARG, + &parse_integer_arg, pmd_zc); if (ret < 0) goto free_kvlist; @@ -778,7 +912,7 @@ free_kvlist: static int get_iface_info(const char *if_name, - struct ether_addr *eth_addr, + struct rte_ether_addr *eth_addr, int *if_index) { struct ifreq ifr; @@ -796,7 +930,7 @@ get_iface_info(const char *if_name, if (ioctl(sock, SIOCGIFHWADDR, &ifr)) goto error; - rte_memcpy(eth_addr, ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN); + rte_memcpy(eth_addr, ifr.ifr_hwaddr.sa_data, RTE_ETHER_ADDR_LEN); close(sock); return 0; @@ -807,9 +941,8 @@ error: } static struct rte_eth_dev * -init_internals(struct rte_vdev_device *dev, - const char *if_name, - int queue_idx) +init_internals(struct rte_vdev_device *dev, const char *if_name, + int start_queue_idx, int queue_cnt, int pmd_zc) { const char *name = rte_vdev_device_name(dev); const unsigned int numa_node = dev->device.numa_node; @@ -822,22 +955,54 @@ init_internals(struct rte_vdev_device *dev, if (internals == NULL) return NULL; - internals->queue_idx = queue_idx; + internals->start_queue_idx = start_queue_idx; + internals->queue_cnt = queue_cnt; + internals->pmd_zc = pmd_zc; strlcpy(internals->if_name, if_name, IFNAMSIZ); - for (i = 0; i < ETH_AF_XDP_MAX_QUEUE_PAIRS; i++) { + if (xdp_get_channels_info(if_name, &internals->max_queue_cnt, + &internals->combined_queue_cnt)) { + AF_XDP_LOG(ERR, "Failed to get channel info of interface: %s\n", + if_name); + goto err_free_internals; + } + + if (queue_cnt > internals->combined_queue_cnt) { + AF_XDP_LOG(ERR, "Specified queue count %d is larger than combined queue count %d.\n", + queue_cnt, internals->combined_queue_cnt); + goto err_free_internals; + } + + internals->rx_queues = rte_zmalloc_socket(NULL, + sizeof(struct pkt_rx_queue) * queue_cnt, + 0, numa_node); + if (internals->rx_queues == NULL) { + AF_XDP_LOG(ERR, "Failed to allocate memory for rx queues.\n"); + goto err_free_internals; + } + + internals->tx_queues = rte_zmalloc_socket(NULL, + sizeof(struct pkt_tx_queue) * queue_cnt, + 0, numa_node); + if (internals->tx_queues == NULL) { + AF_XDP_LOG(ERR, "Failed to allocate memory for tx queues.\n"); + goto err_free_rx; + } + for (i = 0; i < queue_cnt; i++) { internals->tx_queues[i].pair = &internals->rx_queues[i]; internals->rx_queues[i].pair = &internals->tx_queues[i]; + internals->rx_queues[i].xsk_queue_idx = start_queue_idx + i; + internals->tx_queues[i].xsk_queue_idx = start_queue_idx + i; } ret = get_iface_info(if_name, &internals->eth_addr, &internals->if_index); if (ret) - goto err; + goto err_free_tx; eth_dev = rte_eth_vdev_allocate(dev, 0); if (eth_dev == NULL) - goto err; + goto err_free_tx; eth_dev->data->dev_private = internals; eth_dev->data->dev_link = pmd_link; @@ -845,10 +1010,19 @@ init_internals(struct rte_vdev_device *dev, eth_dev->dev_ops = &ops; eth_dev->rx_pkt_burst = eth_af_xdp_rx; eth_dev->tx_pkt_burst = eth_af_xdp_tx; + /* Let rte_eth_dev_close() release the port resources. */ + eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE; + + if (internals->pmd_zc) + AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n"); return eth_dev; -err: +err_free_tx: + rte_free(internals->tx_queues); +err_free_rx: + rte_free(internals->rx_queues); +err_free_internals: rte_free(internals); return NULL; } @@ -858,9 +1032,11 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) { struct rte_kvargs *kvlist; char if_name[IFNAMSIZ] = {'\0'}; - int xsk_queue_idx = ETH_AF_XDP_DFLT_QUEUE_IDX; + int xsk_start_queue_idx = ETH_AF_XDP_DFLT_START_QUEUE_IDX; + int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT; struct rte_eth_dev *eth_dev = NULL; const char *name; + int pmd_zc = 0; AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n", rte_vdev_device_name(dev)); @@ -887,7 +1063,8 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) if (dev->device.numa_node == SOCKET_ID_ANY) dev->device.numa_node = rte_socket_id(); - if (parse_parameters(kvlist, if_name, &xsk_queue_idx) < 0) { + if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx, + &xsk_queue_cnt, &pmd_zc) < 0) { AF_XDP_LOG(ERR, "Invalid kvargs value\n"); return -EINVAL; } @@ -897,7 +1074,8 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) return -EINVAL; } - eth_dev = init_internals(dev, if_name, xsk_queue_idx); + eth_dev = init_internals(dev, if_name, xsk_start_queue_idx, + xsk_queue_cnt, pmd_zc); if (eth_dev == NULL) { AF_XDP_LOG(ERR, "Failed to init internals\n"); return -1; @@ -912,7 +1090,6 @@ static int rte_pmd_af_xdp_remove(struct rte_vdev_device *dev) { struct rte_eth_dev *eth_dev = NULL; - struct pmd_internals *internals; AF_XDP_LOG(INFO, "Removing AF_XDP ethdev on numa socket %u\n", rte_socket_id()); @@ -923,14 +1100,9 @@ rte_pmd_af_xdp_remove(struct rte_vdev_device *dev) /* find the ethdev entry */ eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev)); if (eth_dev == NULL) - return -1; - - internals = eth_dev->data->dev_private; - - rte_ring_free(internals->umem->buf_ring); - rte_memzone_free(internals->umem->mz); - rte_free(internals->umem); + return 0; + eth_dev_close(eth_dev); rte_eth_dev_release_port(eth_dev); @@ -945,7 +1117,9 @@ static struct rte_vdev_driver pmd_af_xdp_drv = { RTE_PMD_REGISTER_VDEV(net_af_xdp, pmd_af_xdp_drv); RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp, "iface= " - "queue= "); + "start_queue= " + "queue_count= " + "pmd_zero_copy=<0|1>"); RTE_INIT(af_xdp_init_log) {