X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fnetvsc%2Fhn_rxtx.c;h=015662fdb496b9a6d1b24fb649b0b4e536f08148;hb=b9d60b5434e9df46f53fc1e3aa4b065f261adb83;hp=cc8a534b5ce8197eb2677667568d5f65b8f5a1d2;hpb=530af95a7849f9dc8d8c4631f99396da5a29f48e;p=dpdk.git diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c index cc8a534b5c..015662fdb4 100644 --- a/drivers/net/netvsc/hn_rxtx.c +++ b/drivers/net/netvsc/hn_rxtx.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -17,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -24,6 +26,7 @@ #include #include #include +#include #include #include @@ -37,9 +40,6 @@ (sizeof(struct vmbus_chanpkt_hdr) + sizeof(struct hn_nvs_rndis)) #define HN_TXD_CACHE_SIZE 32 /* per cpu tx_descriptor pool cache */ -#define HN_TXCOPY_THRESHOLD 512 - -#define HN_RXCOPY_THRESHOLD 256 #define HN_RXQ_EVENT_DEFAULT 2048 struct hn_rxinfo { @@ -81,7 +81,7 @@ struct hn_txdesc { struct rte_mbuf *m; uint16_t queue_id; - uint16_t chim_index; + uint32_t chim_index; uint32_t chim_size; uint32_t data_size; uint32_t packets; @@ -96,17 +96,19 @@ struct hn_txdesc { RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) +#define HN_RNDIS_PKT_ALIGNED RTE_ALIGN(HN_RNDIS_PKT_LEN, RTE_CACHE_LINE_SIZE) + /* Minimum space required for a packet */ #define HN_PKTSIZE_MIN(align) \ - RTE_ALIGN(ETHER_MIN_LEN + HN_RNDIS_PKT_LEN, align) + RTE_ALIGN(RTE_ETHER_MIN_LEN + HN_RNDIS_PKT_LEN, align) -#define DEFAULT_TX_FREE_THRESH 32U +#define DEFAULT_TX_FREE_THRESH 32 static void hn_update_packet_stats(struct hn_stats *stats, const struct rte_mbuf *m) { uint32_t s = m->pkt_len; - const struct ether_addr *ea; + const struct rte_ether_addr *ea; if (s == 64) { stats->size_bins[1]++; @@ -121,13 +123,13 @@ hn_update_packet_stats(struct hn_stats *stats, const struct rte_mbuf *m) stats->size_bins[0]++; else if (s < 1519) stats->size_bins[6]++; - else if (s >= 1519) + else stats->size_bins[7]++; } - ea = rte_pktmbuf_mtod(m, const struct ether_addr *); - if (is_multicast_ether_addr(ea)) { - if (is_broadcast_ether_addr(ea)) + ea = rte_pktmbuf_mtod(m, const struct rte_ether_addr *); + if (rte_is_multicast_ether_addr(ea)) { + if (rte_is_broadcast_ether_addr(ea)) stats->broadcast++; else stats->multicast++; @@ -148,55 +150,82 @@ hn_rndis_pktmsg_offset(uint32_t ofs) static void hn_txd_init(struct rte_mempool *mp __rte_unused, void *opaque, void *obj, unsigned int idx) { + struct hn_tx_queue *txq = opaque; struct hn_txdesc *txd = obj; - struct rte_eth_dev *dev = opaque; - struct rndis_packet_msg *pkt; memset(txd, 0, sizeof(*txd)); - txd->chim_index = idx; - pkt = rte_malloc_socket("RNDIS_TX", HN_RNDIS_PKT_LEN, - rte_align32pow2(HN_RNDIS_PKT_LEN), - dev->device->numa_node); - if (!pkt) - rte_exit(EXIT_FAILURE, "can not allocate RNDIS header"); - - txd->rndis_pkt = pkt; + txd->queue_id = txq->queue_id; + txd->chim_index = NVS_CHIM_IDX_INVALID; + txd->rndis_pkt = (struct rndis_packet_msg *)((char *)txq->tx_rndis + + idx * HN_RNDIS_PKT_ALIGNED); } -/* - * Unlike Linux and FreeBSD, this driver uses a mempool - * to limit outstanding transmits and reserve buffers - */ int -hn_tx_pool_init(struct rte_eth_dev *dev) +hn_chim_init(struct rte_eth_dev *dev) { struct hn_data *hv = dev->data->dev_private; - char name[RTE_MEMPOOL_NAMESIZE]; - struct rte_mempool *mp; + uint32_t i, chim_bmp_size; + + rte_spinlock_init(&hv->chim_lock); + chim_bmp_size = rte_bitmap_get_memory_footprint(hv->chim_cnt); + hv->chim_bmem = rte_zmalloc("hn_chim_bitmap", chim_bmp_size, + RTE_CACHE_LINE_SIZE); + if (hv->chim_bmem == NULL) { + PMD_INIT_LOG(ERR, "failed to allocate bitmap size %u", + chim_bmp_size); + return -1; + } - snprintf(name, sizeof(name), - "hn_txd_%u", dev->data->port_id); - - PMD_INIT_LOG(DEBUG, "create a TX send pool %s n=%u size=%zu socket=%d", - name, hv->chim_cnt, sizeof(struct hn_txdesc), - dev->device->numa_node); - - mp = rte_mempool_create(name, hv->chim_cnt, sizeof(struct hn_txdesc), - HN_TXD_CACHE_SIZE, 0, - NULL, NULL, - hn_txd_init, dev, - dev->device->numa_node, 0); - if (!mp) { - PMD_DRV_LOG(ERR, - "mempool %s create failed: %d", name, rte_errno); - return -rte_errno; + hv->chim_bmap = rte_bitmap_init(hv->chim_cnt, + hv->chim_bmem, chim_bmp_size); + if (hv->chim_bmap == NULL) { + PMD_INIT_LOG(ERR, "failed to init chim bitmap"); + return -1; } - hv->tx_pool = mp; + for (i = 0; i < hv->chim_cnt; i++) + rte_bitmap_set(hv->chim_bmap, i); + return 0; } +void +hn_chim_uninit(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + + rte_bitmap_free(hv->chim_bmap); + rte_free(hv->chim_bmem); + hv->chim_bmem = NULL; +} + +static uint32_t hn_chim_alloc(struct hn_data *hv) +{ + uint32_t index = NVS_CHIM_IDX_INVALID; + uint64_t slab = 0; + + rte_spinlock_lock(&hv->chim_lock); + if (rte_bitmap_scan(hv->chim_bmap, &index, &slab)) { + index += rte_bsf64(slab); + rte_bitmap_clear(hv->chim_bmap, index); + } + rte_spinlock_unlock(&hv->chim_lock); + + return index; +} + +static void hn_chim_free(struct hn_data *hv, uint32_t chim_idx) +{ + if (chim_idx >= hv->chim_cnt) { + PMD_DRV_LOG(ERR, "Invalid chimney index %u", chim_idx); + } else { + rte_spinlock_lock(&hv->chim_lock); + rte_bitmap_set(hv->chim_bmap, chim_idx); + rte_spinlock_unlock(&hv->chim_lock); + } +} + static void hn_reset_txagg(struct hn_tx_queue *txq) { txq->agg_szleft = txq->agg_szmax; @@ -207,17 +236,33 @@ static void hn_reset_txagg(struct hn_tx_queue *txq) int hn_dev_tx_queue_setup(struct rte_eth_dev *dev, - uint16_t queue_idx, uint16_t nb_desc __rte_unused, + uint16_t queue_idx, uint16_t nb_desc, unsigned int socket_id, const struct rte_eth_txconf *tx_conf) { struct hn_data *hv = dev->data->dev_private; struct hn_tx_queue *txq; + char name[RTE_MEMPOOL_NAMESIZE]; uint32_t tx_free_thresh; + int err = -ENOMEM; PMD_INIT_FUNC_TRACE(); + tx_free_thresh = tx_conf->tx_free_thresh; + if (tx_free_thresh == 0) + tx_free_thresh = RTE_MIN(nb_desc / 4, + DEFAULT_TX_FREE_THRESH); + + if (tx_free_thresh + 3 >= nb_desc) { + PMD_INIT_LOG(ERR, + "tx_free_thresh must be less than the number of TX entries minus 3(%u)." + " (tx_free_thresh=%u port=%u queue=%u)\n", + nb_desc - 3, + tx_free_thresh, dev->data->port_id, queue_idx); + return -EINVAL; + } + txq = rte_zmalloc_socket("HN_TXQ", sizeof(*txq), RTE_CACHE_LINE_SIZE, socket_id); if (!txq) @@ -227,16 +272,34 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev, txq->chan = hv->channels[queue_idx]; txq->port_id = dev->data->port_id; txq->queue_id = queue_idx; + txq->free_thresh = tx_free_thresh; - tx_free_thresh = tx_conf->tx_free_thresh; - if (tx_free_thresh == 0) - tx_free_thresh = RTE_MIN(hv->chim_cnt / 4, - DEFAULT_TX_FREE_THRESH); + snprintf(name, sizeof(name), + "hn_txd_%u_%u", dev->data->port_id, queue_idx); - if (tx_free_thresh >= hv->chim_cnt - 3) - tx_free_thresh = hv->chim_cnt - 3; + PMD_INIT_LOG(DEBUG, "TX descriptor pool %s n=%u size=%zu", + name, nb_desc, sizeof(struct hn_txdesc)); - txq->free_thresh = tx_free_thresh; + txq->tx_rndis_mz = rte_memzone_reserve_aligned(name, + nb_desc * HN_RNDIS_PKT_ALIGNED, rte_socket_id(), + RTE_MEMZONE_IOVA_CONTIG, HN_RNDIS_PKT_ALIGNED); + if (!txq->tx_rndis_mz) { + err = -rte_errno; + goto error; + } + txq->tx_rndis = txq->tx_rndis_mz->addr; + txq->tx_rndis_iova = txq->tx_rndis_mz->iova; + + txq->txdesc_pool = rte_mempool_create(name, nb_desc, + sizeof(struct hn_txdesc), + 0, 0, NULL, NULL, + hn_txd_init, txq, + dev->device->numa_node, 0); + if (txq->txdesc_pool == NULL) { + PMD_DRV_LOG(ERR, + "mempool %s create failed: %d", name, rte_errno); + goto error; + } txq->agg_szmax = RTE_MIN(hv->chim_szmax, hv->rndis_agg_size); txq->agg_pktmax = hv->rndis_agg_pkts; @@ -244,45 +307,99 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev, hn_reset_txagg(txq); - dev->data->tx_queues[queue_idx] = txq; + err = hn_vf_tx_queue_setup(dev, queue_idx, nb_desc, + socket_id, tx_conf); + if (err == 0) { + dev->data->tx_queues[queue_idx] = txq; + return 0; + } - return 0; +error: + if (txq->txdesc_pool) + rte_mempool_free(txq->txdesc_pool); + rte_memzone_free(txq->tx_rndis_mz); + rte_free(txq); + return err; +} + +void +hn_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, + struct rte_eth_txq_info *qinfo) +{ + struct hn_tx_queue *txq = dev->data->tx_queues[queue_id]; + + qinfo->nb_desc = txq->txdesc_pool->size; + qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; +} + +static struct hn_txdesc *hn_txd_get(struct hn_tx_queue *txq) +{ + struct hn_txdesc *txd; + + if (rte_mempool_get(txq->txdesc_pool, (void **)&txd)) { + ++txq->stats.ring_full; + PMD_TX_LOG(DEBUG, "tx pool exhausted!"); + return NULL; + } + + txd->m = NULL; + txd->packets = 0; + txd->data_size = 0; + txd->chim_size = 0; + + return txd; +} + +static void hn_txd_put(struct hn_tx_queue *txq, struct hn_txdesc *txd) +{ + rte_mempool_put(txq->txdesc_pool, txd); } void hn_dev_tx_queue_release(void *arg) { struct hn_tx_queue *txq = arg; - struct hn_txdesc *txd; PMD_INIT_FUNC_TRACE(); if (!txq) return; - /* If any pending data is still present just drop it */ - txd = txq->agg_txd; - if (txd) - rte_mempool_put(txq->hv->tx_pool, txd); + if (txq->txdesc_pool) + rte_mempool_free(txq->txdesc_pool); + rte_memzone_free(txq->tx_rndis_mz); rte_free(txq); } -void -hn_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_idx, - struct rte_eth_txq_info *qinfo) +/* + * Check the status of a Tx descriptor in the queue. + * + * returns: + * - -EINVAL - offset outside of tx_descriptor pool. + * - RTE_ETH_TX_DESC_FULL - descriptor is not acknowledged by host. + * - RTE_ETH_TX_DESC_DONE - descriptor is available. + */ +int hn_dev_tx_descriptor_status(void *arg, uint16_t offset) { - struct hn_data *hv = dev->data->dev_private; - struct hn_tx_queue *txq = dev->data->rx_queues[queue_idx]; + const struct hn_tx_queue *txq = arg; - qinfo->conf.tx_free_thresh = txq->free_thresh; - qinfo->nb_desc = hv->tx_pool->size; + hn_process_events(txq->hv, txq->queue_id, 0); + + if (offset >= rte_mempool_avail_count(txq->txdesc_pool)) + return -EINVAL; + + if (offset < rte_mempool_in_use_count(txq->txdesc_pool)) + return RTE_ETH_TX_DESC_FULL; + else + return RTE_ETH_TX_DESC_DONE; } static void hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id, unsigned long xactid, const struct hn_nvs_rndis_ack *ack) { + struct hn_data *hv = dev->data->dev_private; struct hn_txdesc *txd = (struct hn_txdesc *)xactid; struct hn_tx_queue *txq; @@ -298,14 +415,18 @@ hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id, txq->stats.bytes += txd->data_size; txq->stats.packets += txd->packets; } else { - PMD_TX_LOG(NOTICE, "port %u:%u complete tx %u failed status %u", - txq->port_id, txq->queue_id, txd->chim_index, ack->status); + PMD_DRV_LOG(NOTICE, "port %u:%u complete tx %u failed status %u", + txq->port_id, txq->queue_id, txd->chim_index, ack->status); ++txq->stats.errors; } - rte_pktmbuf_free(txd->m); + if (txd->chim_index != NVS_CHIM_IDX_INVALID) { + hn_chim_free(hv, txd->chim_index); + txd->chim_index = NVS_CHIM_IDX_INVALID; + } - rte_mempool_put(txq->hv->tx_pool, txd); + rte_pktmbuf_free(txd->m); + hn_txd_put(txq, txd); } /* Handle transmit completion events */ @@ -322,8 +443,7 @@ hn_nvs_handle_comp(struct rte_eth_dev *dev, uint16_t queue_id, break; default: - PMD_TX_LOG(NOTICE, - "unexpected send completion type %u", + PMD_DRV_LOG(NOTICE, "unexpected send completion type %u", hdr->type); } } @@ -404,35 +524,24 @@ next: return 0; } -/* - * Ack the consumed RXBUF associated w/ this channel packet, - * so that this RXBUF can be recycled by the hypervisor. - */ -static void hn_rx_buf_release(struct hn_rx_bufinfo *rxb) -{ - struct rte_mbuf_ext_shared_info *shinfo = &rxb->shinfo; - struct hn_data *hv = rxb->hv; - - if (rte_mbuf_ext_refcnt_update(shinfo, -1) == 0) { - hn_nvs_ack_rxbuf(rxb->chan, rxb->xactid); - --hv->rxbuf_outstanding; - } -} - static void hn_rx_buf_free_cb(void *buf __rte_unused, void *opaque) { - hn_rx_buf_release(opaque); + struct hn_rx_bufinfo *rxb = opaque; + struct hn_rx_queue *rxq = rxb->rxq; + + rte_atomic32_dec(&rxq->rxbuf_outstanding); + hn_nvs_ack_rxbuf(rxb->chan, rxb->xactid); } -static struct hn_rx_bufinfo *hn_rx_buf_init(const struct hn_rx_queue *rxq, +static struct hn_rx_bufinfo *hn_rx_buf_init(struct hn_rx_queue *rxq, const struct vmbus_chanpkt_rxbuf *pkt) { struct hn_rx_bufinfo *rxb; - rxb = rxq->hv->rxbuf_info + pkt->hdr.xactid; + rxb = rxq->rxbuf_info + pkt->hdr.xactid; rxb->chan = rxq->chan; rxb->xactid = pkt->hdr.xactid; - rxb->hv = rxq->hv; + rxb->rxq = rxq; rxb->shinfo.free_cb = hn_rx_buf_free_cb; rxb->shinfo.fcb_opaque = rxb; @@ -446,6 +555,7 @@ static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb, { struct hn_data *hv = rxq->hv; struct rte_mbuf *m; + bool use_extbuf = false; m = rte_pktmbuf_alloc(rxq->mb_pool); if (unlikely(!m)) { @@ -460,8 +570,9 @@ static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb, * For large packets, avoid copy if possible but need to keep * some space available in receive area for later packets. */ - if (dlen >= HN_RXCOPY_THRESHOLD && - hv->rxbuf_outstanding < hv->rxbuf_section_cnt / 2) { + if (hv->rx_extmbuf_enable && dlen > hv->rx_copybreak && + (uint32_t)rte_atomic32_read(&rxq->rxbuf_outstanding) < + hv->rxbuf_section_cnt / 2) { struct rte_mbuf_ext_shared_info *shinfo; const void *rxbuf; rte_iova_t iova; @@ -475,12 +586,14 @@ static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb, iova = rte_mem_virt2iova(rxbuf) + RTE_PTR_DIFF(data, rxbuf); shinfo = &rxb->shinfo; - if (rte_mbuf_ext_refcnt_update(shinfo, 1) == 1) - ++hv->rxbuf_outstanding; + /* shinfo is already set to 1 by the caller */ + if (rte_mbuf_ext_refcnt_update(shinfo, 1) == 2) + rte_atomic32_inc(&rxq->rxbuf_outstanding); rte_pktmbuf_attach_extbuf(m, data, iova, dlen + headroom, shinfo); m->data_off = headroom; + use_extbuf = true; } else { /* Mbuf's in pool must be large enough to hold small packets */ if (unlikely(rte_pktmbuf_tailroom(m) < dlen)) { @@ -495,10 +608,24 @@ static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb, m->port = rxq->port_id; m->pkt_len = dlen; m->data_len = dlen; + m->packet_type = rte_net_get_ptype(m, NULL, + RTE_PTYPE_L2_MASK | + RTE_PTYPE_L3_MASK | + RTE_PTYPE_L4_MASK); if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { m->vlan_tci = info->vlan_info; m->ol_flags |= PKT_RX_VLAN_STRIPPED | PKT_RX_VLAN; + + /* NDIS always strips tag, put it back if necessary */ + if (!hv->vlan_strip && rte_vlan_insert(&m)) { + PMD_DRV_LOG(DEBUG, "vlan insert failed"); + ++rxq->stats.errors; + if (use_extbuf) + rte_pktmbuf_detach_extbuf(m); + rte_pktmbuf_free(m); + return; + } } if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { @@ -508,6 +635,9 @@ static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb, if (info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK | NDIS_RXCSUM_INFO_TCPCS_OK)) m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; + else if (info->csum_info & (NDIS_RXCSUM_INFO_TCPCS_FAILED + | NDIS_RXCSUM_INFO_UDPCS_FAILED)) + m->ol_flags |= PKT_RX_L4_CKSUM_BAD; } if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { @@ -515,16 +645,20 @@ static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb, m->hash.rss = info->hash_value; } - PMD_RX_LOG(DEBUG, "port %u:%u RX id %" PRIu64 " size %u ol_flags %#" PRIx64, + PMD_RX_LOG(DEBUG, + "port %u:%u RX id %"PRIu64" size %u type %#x ol_flags %#"PRIx64, rxq->port_id, rxq->queue_id, rxb->xactid, - m->pkt_len, m->ol_flags); + m->pkt_len, m->packet_type, m->ol_flags); ++rxq->stats.packets; rxq->stats.bytes += m->pkt_len; hn_update_packet_stats(&rxq->stats, m); if (unlikely(rte_ring_sp_enqueue(rxq->rx_ring, m) != 0)) { - ++rxq->ring_full; + ++rxq->stats.ring_full; + PMD_RX_LOG(DEBUG, "rx ring full"); + if (use_extbuf) + rte_pktmbuf_detach_extbuf(m); rte_pktmbuf_free(m); } } @@ -533,7 +667,8 @@ static void hn_rndis_rx_data(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb, void *data, uint32_t dlen) { - unsigned int data_off, data_len, pktinfo_off, pktinfo_len; + unsigned int data_off, data_len; + unsigned int pktinfo_off, pktinfo_len; const struct rndis_packet_msg *pkt = data; struct hn_rxinfo info = { .vlan_info = HN_NDIS_VLAN_INFO_INVALID, @@ -578,10 +713,11 @@ static void hn_rndis_rx_data(struct hn_rx_queue *rxq, goto error; } - if (unlikely(data_off + data_len > pkt->len)) + /* overflow check */ + if (data_len > data_len + data_off || data_len + data_off > pkt->len) goto error; - if (unlikely(data_len < ETHER_HDR_LEN)) + if (unlikely(data_len < RTE_ETHER_HDR_LEN)) goto error; hn_rxpkt(rxq, rxb, data, data_off, data_len, &info); @@ -591,7 +727,7 @@ error: } static void -hn_rndis_receive(const struct rte_eth_dev *dev, struct hn_rx_queue *rxq, +hn_rndis_receive(struct rte_eth_dev *dev, struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb, void *buf, uint32_t len) { const struct rndis_msghdr *hdr = buf; @@ -603,7 +739,7 @@ hn_rndis_receive(const struct rte_eth_dev *dev, struct hn_rx_queue *rxq, break; case RNDIS_INDICATE_STATUS_MSG: - hn_rndis_link_status(rxq->hv, buf); + hn_rndis_link_status(dev, buf); break; case RNDIS_INITIALIZE_CMPLT: @@ -700,7 +836,37 @@ hn_nvs_handle_rxbuf(struct rte_eth_dev *dev, } /* Send ACK now if external mbuf not used */ - hn_rx_buf_release(rxb); + if (rte_mbuf_ext_refcnt_update(&rxb->shinfo, -1) == 0) + hn_nvs_ack_rxbuf(rxb->chan, rxb->xactid); +} + +/* + * Called when NVS inband events are received. + * Send up a two part message with port_id and the NVS message + * to the pipe to the netvsc-vf-event control thread. + */ +static void hn_nvs_handle_notify(struct rte_eth_dev *dev, + const struct vmbus_chanpkt_hdr *pkt, + const void *data) +{ + const struct hn_nvs_hdr *hdr = data; + + switch (hdr->type) { + case NVS_TYPE_TXTBL_NOTE: + /* Transmit indirection table has locking problems + * in DPDK and therefore not implemented + */ + PMD_DRV_LOG(DEBUG, "host notify of transmit indirection table"); + break; + + case NVS_TYPE_VFASSOC_NOTE: + hn_nvs_handle_vfassoc(dev, pkt, data); + break; + + default: + PMD_DRV_LOG(INFO, + "got notify, nvs type %u", hdr->type); + } } struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv, @@ -709,30 +875,67 @@ struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv, { struct hn_rx_queue *rxq; - rxq = rte_zmalloc_socket("HN_RXQ", - sizeof(*rxq) + HN_RXQ_EVENT_DEFAULT, + rxq = rte_zmalloc_socket("HN_RXQ", sizeof(*rxq), RTE_CACHE_LINE_SIZE, socket_id); - if (rxq) { - rxq->hv = hv; - rxq->chan = hv->channels[queue_id]; - rte_spinlock_init(&rxq->ring_lock); - rxq->port_id = hv->port_id; - rxq->queue_id = queue_id; + if (!rxq) + return NULL; + + rxq->hv = hv; + rxq->chan = hv->channels[queue_id]; + rte_spinlock_init(&rxq->ring_lock); + rxq->port_id = hv->port_id; + rxq->queue_id = queue_id; + rxq->event_sz = HN_RXQ_EVENT_DEFAULT; + rxq->event_buf = rte_malloc_socket("HN_EVENTS", HN_RXQ_EVENT_DEFAULT, + RTE_CACHE_LINE_SIZE, socket_id); + if (!rxq->event_buf) { + rte_free(rxq); + return NULL; + } + + /* setup rxbuf_info for non-primary queue */ + if (queue_id) { + rxq->rxbuf_info = rte_calloc("HN_RXBUF_INFO", + hv->rxbuf_section_cnt, + sizeof(*rxq->rxbuf_info), + RTE_CACHE_LINE_SIZE); + + if (!rxq->rxbuf_info) { + PMD_DRV_LOG(ERR, + "Could not allocate rxbuf info for queue %d\n", + queue_id); + rte_free(rxq->event_buf); + rte_free(rxq); + return NULL; + } } + return rxq; } +void +hn_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, + struct rte_eth_rxq_info *qinfo) +{ + struct hn_rx_queue *rxq = dev->data->rx_queues[queue_id]; + + qinfo->mp = rxq->mb_pool; + qinfo->nb_desc = rxq->rx_ring->size; + qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; +} + int hn_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t nb_desc, unsigned int socket_id, - const struct rte_eth_rxconf *rx_conf __rte_unused, + const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp) { struct hn_data *hv = dev->data->dev_private; char ring_name[RTE_RING_NAMESIZE]; struct hn_rx_queue *rxq; unsigned int count; + int error = -ENOMEM; PMD_INIT_FUNC_TRACE(); @@ -762,22 +965,25 @@ hn_dev_rx_queue_setup(struct rte_eth_dev *dev, if (!rxq->rx_ring) goto fail; + error = hn_vf_rx_queue_setup(dev, queue_idx, nb_desc, + socket_id, rx_conf, mp); + if (error) + goto fail; + dev->data->rx_queues[queue_idx] = rxq; return 0; fail: rte_ring_free(rxq->rx_ring); + rte_free(rxq->rxbuf_info); rte_free(rxq->event_buf); rte_free(rxq); - return -ENOMEM; + return error; } -void -hn_dev_rx_queue_release(void *arg) +static void +hn_rx_queue_free(struct hn_rx_queue *rxq, bool keep_primary) { - struct hn_rx_queue *rxq = arg; - - PMD_INIT_FUNC_TRACE(); if (!rxq) return; @@ -786,77 +992,121 @@ hn_dev_rx_queue_release(void *arg) rxq->rx_ring = NULL; rxq->mb_pool = NULL; - if (rxq != rxq->hv->primary) { - rte_free(rxq->event_buf); - rte_free(rxq); - } + hn_vf_rx_queue_release(rxq->hv, rxq->queue_id); + + /* Keep primary queue to allow for control operations */ + if (keep_primary && rxq == rxq->hv->primary) + return; + + rte_free(rxq->rxbuf_info); + rte_free(rxq->event_buf); + rte_free(rxq); } void -hn_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_idx, - struct rte_eth_rxq_info *qinfo) +hn_dev_rx_queue_release(void *arg) { - struct hn_rx_queue *rxq = dev->data->rx_queues[queue_idx]; + struct hn_rx_queue *rxq = arg; - qinfo->mp = rxq->mb_pool; - qinfo->scattered_rx = 1; - qinfo->nb_desc = rte_ring_get_capacity(rxq->rx_ring); + PMD_INIT_FUNC_TRACE(); + + hn_rx_queue_free(rxq, true); } -static void -hn_nvs_handle_notify(const struct vmbus_chanpkt_hdr *pkthdr, - const void *data) +/* + * Get the number of used descriptor in a rx queue + * For this device that means how many packets are pending in the ring. + */ +uint32_t +hn_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t queue_id) { - const struct hn_nvs_hdr *hdr = data; + struct hn_rx_queue *rxq = dev->data->rx_queues[queue_id]; - if (unlikely(vmbus_chanpkt_datalen(pkthdr) < sizeof(*hdr))) { - PMD_DRV_LOG(ERR, "invalid nvs notify"); - return; - } + return rte_ring_count(rxq->rx_ring); +} + +/* + * Check the status of a Rx descriptor in the queue + * + * returns: + * - -EINVAL - offset outside of ring + * - RTE_ETH_RX_DESC_AVAIL - no data available yet + * - RTE_ETH_RX_DESC_DONE - data is waiting in stagin ring + */ +int hn_dev_rx_queue_status(void *arg, uint16_t offset) +{ + const struct hn_rx_queue *rxq = arg; - PMD_DRV_LOG(INFO, - "got notify, nvs type %u", hdr->type); + hn_process_events(rxq->hv, rxq->queue_id, 0); + if (offset >= rxq->rx_ring->capacity) + return -EINVAL; + + if (offset < rte_ring_count(rxq->rx_ring)) + return RTE_ETH_RX_DESC_DONE; + else + return RTE_ETH_RX_DESC_AVAIL; +} + +int +hn_dev_tx_done_cleanup(void *arg, uint32_t free_cnt) +{ + struct hn_tx_queue *txq = arg; + + return hn_process_events(txq->hv, txq->queue_id, free_cnt); } /* * Process pending events on the channel. * Called from both Rx queue poll and Tx cleanup */ -void hn_process_events(struct hn_data *hv, uint16_t queue_id) +uint32_t hn_process_events(struct hn_data *hv, uint16_t queue_id, + uint32_t tx_limit) { struct rte_eth_dev *dev = &rte_eth_devices[hv->port_id]; struct hn_rx_queue *rxq; uint32_t bytes_read = 0; + uint32_t tx_done = 0; int ret = 0; rxq = queue_id == 0 ? hv->primary : dev->data->rx_queues[queue_id]; - /* If no pending data then nothing to do */ - if (rte_vmbus_chan_rx_empty(rxq->chan)) - return; - /* * Since channel is shared between Rx and TX queue need to have a lock * since DPDK does not force same CPU to be used for Rx/Tx. */ if (unlikely(!rte_spinlock_trylock(&rxq->ring_lock))) - return; + return 0; for (;;) { const struct vmbus_chanpkt_hdr *pkt; - uint32_t len = HN_RXQ_EVENT_DEFAULT; + uint32_t len = rxq->event_sz; const void *data; +retry: ret = rte_vmbus_chan_recv_raw(rxq->chan, rxq->event_buf, &len); if (ret == -EAGAIN) break; /* ring is empty */ - else if (ret == -ENOBUFS) - rte_exit(EXIT_FAILURE, "event buffer not big enough (%u < %u)", - HN_RXQ_EVENT_DEFAULT, len); - else if (ret <= 0) + if (unlikely(ret == -ENOBUFS)) { + /* event buffer not large enough to read ring */ + + PMD_DRV_LOG(DEBUG, + "event buffer expansion (need %u)", len); + rxq->event_sz = len + len / 4; + rxq->event_buf = rte_realloc(rxq->event_buf, rxq->event_sz, + RTE_CACHE_LINE_SIZE); + if (rxq->event_buf) + goto retry; + /* out of memory, no more events now */ + rxq->event_sz = 0; + break; + } + + if (unlikely(ret <= 0)) { + /* This indicates a failure to communicate (or worse) */ rte_exit(EXIT_FAILURE, "vmbus ring buffer error: %d", ret); + } bytes_read += ret; pkt = (const struct vmbus_chanpkt_hdr *)rxq->event_buf; @@ -864,6 +1114,7 @@ void hn_process_events(struct hn_data *hv, uint16_t queue_id) switch (pkt->type) { case VMBUS_CHANPKT_TYPE_COMP: + ++tx_done; hn_nvs_handle_comp(dev, queue_id, pkt, data); break; @@ -872,7 +1123,7 @@ void hn_process_events(struct hn_data *hv, uint16_t queue_id) break; case VMBUS_CHANPKT_TYPE_INBAND: - hn_nvs_handle_notify(pkt, data); + hn_nvs_handle_notify(dev, pkt, data); break; default: @@ -880,7 +1131,7 @@ void hn_process_events(struct hn_data *hv, uint16_t queue_id) break; } - if (rxq->rx_ring && rte_ring_full(rxq->rx_ring)) + if (tx_limit && tx_done >= tx_limit) break; } @@ -888,6 +1139,8 @@ void hn_process_events(struct hn_data *hv, uint16_t queue_id) rte_vmbus_chan_signal_read(rxq->chan, bytes_read); rte_spinlock_unlock(&rxq->ring_lock); + + return tx_done; } static void hn_append_to_chim(struct hn_tx_queue *txq, @@ -945,35 +1198,28 @@ static int hn_flush_txagg(struct hn_tx_queue *txq, bool *need_sig) if (likely(ret == 0)) hn_reset_txagg(txq); - else - PMD_TX_LOG(NOTICE, "port %u:%u send failed: %d", - txq->port_id, txq->queue_id, ret); - - return ret; -} - -static struct hn_txdesc *hn_new_txd(struct hn_data *hv, - struct hn_tx_queue *txq) -{ - struct hn_txdesc *txd; + else if (ret == -EAGAIN) { + PMD_TX_LOG(DEBUG, "port %u:%u channel full", + txq->port_id, txq->queue_id); + ++txq->stats.channel_full; + } else { + ++txq->stats.errors; - if (rte_mempool_get(hv->tx_pool, (void **)&txd)) { - ++txq->stats.nomemory; - PMD_TX_LOG(DEBUG, "tx pool exhausted!"); - return NULL; + PMD_DRV_LOG(NOTICE, "port %u:%u send failed: %d", + txq->port_id, txq->queue_id, ret); } - - txd->m = NULL; - txd->queue_id = txq->queue_id; - txd->packets = 0; - txd->data_size = 0; - txd->chim_size = 0; - - return txd; + return ret; } +/* + * Try and find a place in a send chimney buffer to put + * the small packet. If space is available, this routine + * returns a pointer of where to place the data. + * If no space, caller should try direct transmit. + */ static void * -hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize) +hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, + struct hn_txdesc *txd, uint32_t pktsize) { struct hn_txdesc *agg_txd = txq->agg_txd; struct rndis_packet_msg *pkt; @@ -1001,7 +1247,7 @@ hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize) } chim = (uint8_t *)pkt + pkt->len; - + txq->agg_prevpkt = chim; txq->agg_pktleft--; txq->agg_szleft -= pktsize; if (txq->agg_szleft < HN_PKTSIZE_MIN(txq->agg_align)) { @@ -1011,18 +1257,21 @@ hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize) */ txq->agg_pktleft = 0; } - } else { - agg_txd = hn_new_txd(hv, txq); - if (!agg_txd) - return NULL; - - chim = (uint8_t *)hv->chim_res->addr - + agg_txd->chim_index * hv->chim_szmax; - txq->agg_txd = agg_txd; - txq->agg_pktleft = txq->agg_pktmax - 1; - txq->agg_szleft = txq->agg_szmax - pktsize; + hn_txd_put(txq, txd); + return chim; } + + txd->chim_index = hn_chim_alloc(hv); + if (txd->chim_index == NVS_CHIM_IDX_INVALID) + return NULL; + + chim = (uint8_t *)hv->chim_res->addr + + txd->chim_index * hv->chim_szmax; + + txq->agg_txd = txd; + txq->agg_pktleft = txq->agg_pktmax - 1; + txq->agg_szleft = txq->agg_szmax - pktsize; txq->agg_prevpkt = chim; return chim; @@ -1198,11 +1447,8 @@ static int hn_xmit_sg(struct hn_tx_queue *txq, hn_rndis_dump(txd->rndis_pkt); /* pass IOVA of rndis header in first segment */ - addr = rte_malloc_virt2iova(txd->rndis_pkt); - if (unlikely(addr == RTE_BAD_IOVA)) { - PMD_DRV_LOG(ERR, "RNDIS transmit can not get iova"); - return -EINVAL; - } + addr = txq->tx_rndis_iova + + ((char *)txd->rndis_pkt - (char *)txq->tx_rndis); sg[0].page = addr / PAGE_SIZE; sg[0].ofs = addr & PAGE_MASK; @@ -1226,24 +1472,50 @@ uint16_t hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { struct hn_tx_queue *txq = ptxq; + uint16_t queue_id = txq->queue_id; struct hn_data *hv = txq->hv; + struct rte_eth_dev *vf_dev; bool need_sig = false; - uint16_t nb_tx; + uint16_t nb_tx, tx_thresh; int ret; if (unlikely(hv->closed)) return 0; - if (rte_mempool_avail_count(hv->tx_pool) <= txq->free_thresh) - hn_process_events(hv, txq->queue_id); + /* + * Always check for events on the primary channel + * because that is where hotplug notifications occur. + */ + tx_thresh = RTE_MAX(txq->free_thresh, nb_pkts); + if (txq->queue_id == 0 || + rte_mempool_avail_count(txq->txdesc_pool) < tx_thresh) + hn_process_events(hv, txq->queue_id, 0); + + /* Transmit over VF if present and up */ + rte_rwlock_read_lock(&hv->vf_lock); + vf_dev = hn_get_vf_dev(hv); + if (vf_dev && vf_dev->data->dev_started) { + void *sub_q = vf_dev->data->tx_queues[queue_id]; + + nb_tx = (*vf_dev->tx_pkt_burst)(sub_q, tx_pkts, nb_pkts); + rte_rwlock_read_unlock(&hv->vf_lock); + return nb_tx; + } + rte_rwlock_read_unlock(&hv->vf_lock); for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { struct rte_mbuf *m = tx_pkts[nb_tx]; uint32_t pkt_size = m->pkt_len + HN_RNDIS_PKT_LEN; struct rndis_packet_msg *pkt; + struct hn_txdesc *txd; + + txd = hn_txd_get(txq); + if (txd == NULL) + break; /* For small packets aggregate them in chimney buffer */ - if (m->pkt_len < HN_TXCOPY_THRESHOLD && pkt_size <= txq->agg_szmax) { + if (m->pkt_len <= hv->tx_copybreak && + pkt_size <= txq->agg_szmax) { /* If this packet will not fit, then flush */ if (txq->agg_pktleft == 0 || RTE_ALIGN(pkt_size, txq->agg_align) > txq->agg_szleft) { @@ -1251,11 +1523,12 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) goto fail; } - pkt = hn_try_txagg(hv, txq, pkt_size); + + pkt = hn_try_txagg(hv, txq, txd, pkt_size); if (unlikely(!pkt)) break; - hn_encap(pkt, txq->queue_id, m); + hn_encap(pkt, queue_id, m); hn_append_to_chim(txq, pkt, m); rte_pktmbuf_free(m); @@ -1265,30 +1538,27 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) hn_flush_txagg(txq, &need_sig)) goto fail; } else { - struct hn_txdesc *txd; - - /* can send chimney data and large packet at once */ - txd = txq->agg_txd; - if (txd) { - hn_reset_txagg(txq); - } else { - txd = hn_new_txd(hv, txq); - if (unlikely(!txd)) - break; - } + /* Send any outstanding packets in buffer */ + if (txq->agg_txd && hn_flush_txagg(txq, &need_sig)) + goto fail; pkt = txd->rndis_pkt; txd->m = m; - txd->data_size += m->pkt_len; + txd->data_size = m->pkt_len; ++txd->packets; - hn_encap(pkt, txq->queue_id, m); + hn_encap(pkt, queue_id, m); ret = hn_xmit_sg(txq, txd, m, &need_sig); if (unlikely(ret != 0)) { - PMD_TX_LOG(NOTICE, "sg send failed: %d", ret); - ++txq->stats.errors; - rte_mempool_put(hv->tx_pool, txd); + if (ret == -EAGAIN) { + PMD_TX_LOG(DEBUG, "sg channel full"); + ++txq->stats.channel_full; + } else { + PMD_DRV_LOG(NOTICE, "sg send failed: %d", ret); + ++txq->stats.errors; + } + hn_txd_put(txq, txd); goto fail; } } @@ -1306,20 +1576,70 @@ fail: return nb_tx; } +static uint16_t +hn_recv_vf(uint16_t vf_port, const struct hn_rx_queue *rxq, + struct rte_mbuf **rx_pkts, uint16_t nb_pkts) +{ + uint16_t i, n; + + if (unlikely(nb_pkts == 0)) + return 0; + + n = rte_eth_rx_burst(vf_port, rxq->queue_id, rx_pkts, nb_pkts); + + /* relabel the received mbufs */ + for (i = 0; i < n; i++) + rx_pkts[i]->port = rxq->port_id; + + return n; +} + uint16_t hn_recv_pkts(void *prxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) { struct hn_rx_queue *rxq = prxq; struct hn_data *hv = rxq->hv; + struct rte_eth_dev *vf_dev; + uint16_t nb_rcv; if (unlikely(hv->closed)) return 0; - /* If ring is empty then process more */ - if (rte_ring_count(rxq->rx_ring) < nb_pkts) - hn_process_events(hv, rxq->queue_id); + /* Check for new completions (and hotplug) */ + if (likely(rte_ring_count(rxq->rx_ring) < nb_pkts)) + hn_process_events(hv, rxq->queue_id, 0); + + /* Always check the vmbus path for multicast and new flows */ + nb_rcv = rte_ring_sc_dequeue_burst(rxq->rx_ring, + (void **)rx_pkts, nb_pkts, NULL); + + /* If VF is available, check that as well */ + rte_rwlock_read_lock(&hv->vf_lock); + vf_dev = hn_get_vf_dev(hv); + if (vf_dev && vf_dev->data->dev_started) + nb_rcv += hn_recv_vf(vf_dev->data->port_id, rxq, + rx_pkts + nb_rcv, nb_pkts - nb_rcv); + + rte_rwlock_read_unlock(&hv->vf_lock); + return nb_rcv; +} - /* Get mbufs off staging ring */ - return rte_ring_sc_dequeue_burst(rxq->rx_ring, (void **)rx_pkts, - nb_pkts, NULL); +void +hn_dev_free_queues(struct rte_eth_dev *dev) +{ + unsigned int i; + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + struct hn_rx_queue *rxq = dev->data->rx_queues[i]; + + hn_rx_queue_free(rxq, false); + dev->data->rx_queues[i] = NULL; + } + dev->data->nb_rx_queues = 0; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + hn_dev_tx_queue_release(dev->data->tx_queues[i]); + dev->data->tx_queues[i] = NULL; + } + dev->data->nb_tx_queues = 0; }