X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Favp%2Favp_ethdev.c;h=5b47f0924af7cd875ea2447c748aece3d352c9f8;hb=8cc50a63800278ba53c67437c9d19649e17e8c6c;hp=8945a45b577ba7079d44d4109ed057f1f795b8c5;hpb=cce4367fb4c862ece1eca544e5389ced47690d9f;p=dpdk.git diff --git a/drivers/net/avp/avp_ethdev.c b/drivers/net/avp/avp_ethdev.c index 8945a45b57..5b47f0924a 100644 --- a/drivers/net/avp/avp_ethdev.c +++ b/drivers/net/avp/avp_ethdev.c @@ -1,33 +1,5 @@ -/* - * BSD LICENSE - * - * Copyright (c) 2013-2017, Wind River Systems, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1) Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2) Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3) Neither the name of Wind River Systems nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2013-2017 Wind River Systems, Inc. */ #include @@ -36,17 +8,19 @@ #include #include -#include +#include +#include #include #include -#include #include #include #include #include +#include #include #include #include +#include #include #include #include @@ -58,14 +32,20 @@ #include "avp_logs.h" - +static int avp_dev_create(struct rte_pci_device *pci_dev, + struct rte_eth_dev *eth_dev); static int avp_dev_configure(struct rte_eth_dev *dev); -static void avp_dev_info_get(struct rte_eth_dev *dev, - struct rte_eth_dev_info *dev_info); -static void avp_vlan_offload_set(struct rte_eth_dev *dev, int mask); -static int avp_dev_link_update(struct rte_eth_dev *dev, - __rte_unused int wait_to_complete); +static int avp_dev_start(struct rte_eth_dev *dev); +static void avp_dev_stop(struct rte_eth_dev *dev); +static void avp_dev_close(struct rte_eth_dev *dev); +static int avp_dev_info_get(struct rte_eth_dev *dev, + struct rte_eth_dev_info *dev_info); +static int avp_vlan_offload_set(struct rte_eth_dev *dev, int mask); +static int avp_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete); +static int avp_dev_promiscuous_enable(struct rte_eth_dev *dev); +static int avp_dev_promiscuous_disable(struct rte_eth_dev *dev); + static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, uint16_t nb_rx_desc, @@ -79,13 +59,34 @@ static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev, unsigned int socket_id, const struct rte_eth_txconf *tx_conf); +static uint16_t avp_recv_scattered_pkts(void *rx_queue, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); + +static uint16_t avp_recv_pkts(void *rx_queue, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); + +static uint16_t avp_xmit_scattered_pkts(void *tx_queue, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + +static uint16_t avp_xmit_pkts(void *tx_queue, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + static void avp_dev_rx_queue_release(void *rxq); static void avp_dev_tx_queue_release(void *txq); -#define AVP_DEV_TO_PCI(eth_dev) RTE_DEV_TO_PCI((eth_dev)->device) + +static int avp_dev_stats_get(struct rte_eth_dev *dev, + struct rte_eth_stats *stats); +static int avp_dev_stats_reset(struct rte_eth_dev *dev); +#define AVP_MAX_RX_BURST 64 +#define AVP_MAX_TX_BURST 64 #define AVP_MAX_MAC_ADDRS 1 -#define AVP_MIN_RX_BUFSIZE ETHER_MIN_LEN +#define AVP_MIN_RX_BUFSIZE RTE_ETHER_MIN_LEN /* @@ -123,9 +124,16 @@ static const struct rte_pci_id pci_id_avp_map[] = { */ static const struct eth_dev_ops avp_eth_dev_ops = { .dev_configure = avp_dev_configure, + .dev_start = avp_dev_start, + .dev_stop = avp_dev_stop, + .dev_close = avp_dev_close, .dev_infos_get = avp_dev_info_get, .vlan_offload_set = avp_vlan_offload_set, + .stats_get = avp_dev_stats_get, + .stats_reset = avp_dev_stats_reset, .link_update = avp_dev_link_update, + .promiscuous_enable = avp_dev_promiscuous_enable, + .promiscuous_disable = avp_dev_promiscuous_disable, .rx_queue_setup = avp_dev_rx_queue_setup, .rx_queue_release = avp_dev_rx_queue_release, .tx_queue_setup = avp_dev_tx_queue_setup, @@ -136,6 +144,7 @@ static const struct eth_dev_ops avp_eth_dev_ops = { #define AVP_F_PROMISC (1 << 1) #define AVP_F_CONFIGURED (1 << 2) #define AVP_F_LINKUP (1 << 3) +#define AVP_F_DETACHED (1 << 4) /**@} */ /* Ethernet device validation marker */ @@ -148,11 +157,11 @@ static const struct eth_dev_ops avp_eth_dev_ops = { struct avp_dev { uint32_t magic; /**< Memory validation marker */ uint64_t device_id; /**< Unique system identifier */ - struct ether_addr ethaddr; /**< Host specified MAC address */ + struct rte_ether_addr ethaddr; /**< Host specified MAC address */ struct rte_eth_dev_data *dev_data; /**< Back pointer to ethernet device data */ volatile uint32_t flags; /**< Device operational flags */ - uint8_t port_id; /**< Ethernet port identifier */ + uint16_t port_id; /**< Ethernet port identifier */ struct rte_mempool *pool; /**< pkt mbuf mempool */ unsigned int guest_mbuf_size; /**< local pool mbuf size */ unsigned int host_mbuf_size; /**< host mbuf size */ @@ -171,6 +180,9 @@ struct avp_dev { struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES]; /**< To be freed mbufs queue */ + /* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */ + rte_spinlock_t lock; + /* For request & response */ struct rte_avp_fifo *req_q; /**< Request queue */ struct rte_avp_fifo *resp_q; /**< Response queue */ @@ -283,6 +295,23 @@ done: return ret; } +static int +avp_dev_ctrl_set_link_state(struct rte_eth_dev *eth_dev, unsigned int state) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + struct rte_avp_request request; + int ret; + + /* setup a link state change request */ + memset(&request, 0, sizeof(request)); + request.req_id = RTE_AVP_REQ_CFG_NETWORK_IF; + request.if_up = state; + + ret = avp_dev_process_request(avp, &request); + + return ret == 0 ? request.result : ret; +} + static int avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev, struct rte_avp_device_config *config) @@ -301,12 +330,37 @@ avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev, return ret == 0 ? request.result : ret; } +static int +avp_dev_ctrl_shutdown(struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + struct rte_avp_request request; + int ret; + + /* setup a shutdown request */ + memset(&request, 0, sizeof(request)); + request.req_id = RTE_AVP_REQ_SHUTDOWN_DEVICE; + + ret = avp_dev_process_request(avp, &request); + + return ret == 0 ? request.result : ret; +} + +/* translate from host mbuf virtual address to guest virtual address */ +static inline void * +avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address) +{ + return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address, + (uintptr_t)avp->host_mbuf_addr), + (uintptr_t)avp->mbuf_addr); +} + /* translate from host physical address to guest virtual address */ static void * avp_dev_translate_address(struct rte_eth_dev *eth_dev, - phys_addr_t host_phys_addr) + rte_iova_t host_phys_addr) { - struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); struct rte_mem_resource *resource; struct rte_avp_memmap_info *info; struct rte_avp_memmap *map; @@ -327,7 +381,7 @@ avp_dev_translate_address(struct rte_eth_dev *eth_dev, (host_phys_addr < (map->phys_addr + map->length))) { /* address is within this segment */ offset += (host_phys_addr - map->phys_addr); - addr = RTE_PTR_ADD(addr, offset); + addr = RTE_PTR_ADD(addr, (uintptr_t)offset); PMD_DRV_LOG(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p\n", host_phys_addr, addr); @@ -359,7 +413,7 @@ avp_dev_version_check(uint32_t version) static int avp_dev_check_regions(struct rte_eth_dev *eth_dev) { - struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); struct rte_avp_memmap_info *memmap; struct rte_avp_device_info *info; struct rte_mem_resource *resource; @@ -416,6 +470,46 @@ avp_dev_check_regions(struct rte_eth_dev *eth_dev) return 0; } +static int +avp_dev_detach(struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + int ret; + + PMD_DRV_LOG(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "\n", + eth_dev->data->port_id, avp->device_id); + + rte_spinlock_lock(&avp->lock); + + if (avp->flags & AVP_F_DETACHED) { + PMD_DRV_LOG(NOTICE, "port %u already detached\n", + eth_dev->data->port_id); + ret = 0; + goto unlock; + } + + /* shutdown the device first so the host stops sending us packets. */ + ret = avp_dev_ctrl_shutdown(eth_dev); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to send/recv shutdown to host, ret=%d\n", + ret); + avp->flags &= ~AVP_F_DETACHED; + goto unlock; + } + + avp->flags |= AVP_F_DETACHED; + rte_wmb(); + + /* wait for queues to acknowledge the presence of the detach flag */ + rte_delay_ms(1); + + ret = 0; + +unlock: + rte_spinlock_unlock(&avp->lock); + return ret; +} + static void _avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id) { @@ -455,7 +549,7 @@ _avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id) static void _avp_set_queue_counts(struct rte_eth_dev *eth_dev) { - struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); struct rte_avp_device_info *host_info; void *addr; @@ -484,6 +578,239 @@ _avp_set_queue_counts(struct rte_eth_dev *eth_dev) avp->num_tx_queues, avp->num_rx_queues); } +static int +avp_dev_attach(struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + struct rte_avp_device_config config; + unsigned int i; + int ret; + + PMD_DRV_LOG(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "\n", + eth_dev->data->port_id, avp->device_id); + + rte_spinlock_lock(&avp->lock); + + if (!(avp->flags & AVP_F_DETACHED)) { + PMD_DRV_LOG(NOTICE, "port %u already attached\n", + eth_dev->data->port_id); + ret = 0; + goto unlock; + } + + /* + * make sure that the detached flag is set prior to reconfiguring the + * queues. + */ + avp->flags |= AVP_F_DETACHED; + rte_wmb(); + + /* + * re-run the device create utility which will parse the new host info + * and setup the AVP device queue pointers. + */ + ret = avp_dev_create(RTE_ETH_DEV_TO_PCI(eth_dev), eth_dev); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to re-create AVP device, ret=%d\n", + ret); + goto unlock; + } + + if (avp->flags & AVP_F_CONFIGURED) { + /* + * Update the receive queue mapping to handle cases where the + * source and destination hosts have different queue + * requirements. As long as the DETACHED flag is asserted the + * queue table should not be referenced so it should be safe to + * update it. + */ + _avp_set_queue_counts(eth_dev); + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) + _avp_set_rx_queue_mappings(eth_dev, i); + + /* + * Update the host with our config details so that it knows the + * device is active. + */ + memset(&config, 0, sizeof(config)); + config.device_id = avp->device_id; + config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK; + config.driver_version = AVP_DPDK_DRIVER_VERSION; + config.features = avp->features; + config.num_tx_queues = avp->num_tx_queues; + config.num_rx_queues = avp->num_rx_queues; + config.if_up = !!(avp->flags & AVP_F_LINKUP); + + ret = avp_dev_ctrl_set_config(eth_dev, &config); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n", + ret); + goto unlock; + } + } + + rte_wmb(); + avp->flags &= ~AVP_F_DETACHED; + + ret = 0; + +unlock: + rte_spinlock_unlock(&avp->lock); + return ret; +} + +static void +avp_dev_interrupt_handler(void *data) +{ + struct rte_eth_dev *eth_dev = data; + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); + void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; + uint32_t status, value; + int ret; + + if (registers == NULL) + rte_panic("no mapped MMIO register space\n"); + + /* read the interrupt status register + * note: this register clears on read so all raised interrupts must be + * handled or remembered for later processing + */ + status = AVP_READ32( + RTE_PTR_ADD(registers, + RTE_AVP_INTERRUPT_STATUS_OFFSET)); + + if (status & RTE_AVP_MIGRATION_INTERRUPT_MASK) { + /* handle interrupt based on current status */ + value = AVP_READ32( + RTE_PTR_ADD(registers, + RTE_AVP_MIGRATION_STATUS_OFFSET)); + switch (value) { + case RTE_AVP_MIGRATION_DETACHED: + ret = avp_dev_detach(eth_dev); + break; + case RTE_AVP_MIGRATION_ATTACHED: + ret = avp_dev_attach(eth_dev); + break; + default: + PMD_DRV_LOG(ERR, "unexpected migration status, status=%u\n", + value); + ret = -EINVAL; + } + + /* acknowledge the request by writing out our current status */ + value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR); + AVP_WRITE32(value, + RTE_PTR_ADD(registers, + RTE_AVP_MIGRATION_ACK_OFFSET)); + + PMD_DRV_LOG(NOTICE, "AVP migration interrupt handled\n"); + } + + if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK) + PMD_DRV_LOG(WARNING, "AVP unexpected interrupt, status=0x%08x\n", + status); + + /* re-enable UIO interrupt handling */ + ret = rte_intr_ack(&pci_dev->intr_handle); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n", + ret); + /* continue */ + } +} + +static int +avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev) +{ + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); + void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; + int ret; + + if (registers == NULL) + return -EINVAL; + + /* enable UIO interrupt handling */ + ret = rte_intr_enable(&pci_dev->intr_handle); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to enable UIO interrupts, ret=%d\n", + ret); + return ret; + } + + /* inform the device that all interrupts are enabled */ + AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK, + RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET)); + + return 0; +} + +static int +avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev) +{ + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); + void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; + int ret; + + if (registers == NULL) + return 0; + + /* inform the device that all interrupts are disabled */ + AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK, + RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET)); + + /* enable UIO interrupt handling */ + ret = rte_intr_disable(&pci_dev->intr_handle); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to disable UIO interrupts, ret=%d\n", + ret); + return ret; + } + + return 0; +} + +static int +avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev) +{ + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); + int ret; + + /* register a callback handler with UIO for interrupt notifications */ + ret = rte_intr_callback_register(&pci_dev->intr_handle, + avp_dev_interrupt_handler, + (void *)eth_dev); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to register UIO interrupt callback, ret=%d\n", + ret); + return ret; + } + + /* enable interrupt processing */ + return avp_dev_enable_interrupts(eth_dev); +} + +static int +avp_dev_migration_pending(struct rte_eth_dev *eth_dev) +{ + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); + void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; + uint32_t value; + + if (registers == NULL) + return 0; + + value = AVP_READ32(RTE_PTR_ADD(registers, + RTE_AVP_MIGRATION_STATUS_OFFSET)); + if (value == RTE_AVP_MIGRATION_DETACHED) { + /* migration is in progress; ack it if we have not already */ + AVP_WRITE32(value, + RTE_PTR_ADD(registers, + RTE_AVP_MIGRATION_ACK_OFFSET)); + return 1; + } + return 0; +} + /* * create a AVP device using the supplied device info by first translating it * to guest address space(s). @@ -536,8 +863,9 @@ avp_dev_create(struct rte_pci_device *pci_dev, avp->port_id = eth_dev->data->port_id; avp->host_mbuf_size = host_info->mbuf_size; avp->host_features = host_info->features; + rte_spinlock_init(&avp->lock); memcpy(&avp->ethaddr.addr_bytes[0], - host_info->ethaddr, ETHER_ADDR_LEN); + host_info->ethaddr, RTE_ETHER_ADDR_LEN); /* adjust max values to not exceed our max */ avp->max_tx_queues = RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES); @@ -625,8 +953,12 @@ eth_avp_dev_init(struct rte_eth_dev *eth_dev) struct rte_pci_device *pci_dev; int ret; - pci_dev = AVP_DEV_TO_PCI(eth_dev); + pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); eth_dev->dev_ops = &avp_eth_dev_ops; + eth_dev->rx_pkt_burst = &avp_recv_pkts; + eth_dev->tx_pkt_burst = &avp_xmit_pkts; + /* Let rte_eth_dev_close() release the port resources */ + eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE; if (rte_eal_process_type() != RTE_PROC_PRIMARY) { /* @@ -635,12 +967,21 @@ eth_avp_dev_init(struct rte_eth_dev *eth_dev) * be mapped to the same virtual address so all pointers should * be valid. */ + if (eth_dev->data->scattered_rx) { + PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n"); + eth_dev->rx_pkt_burst = avp_recv_scattered_pkts; + eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts; + } return 0; } rte_eth_copy_pci_info(eth_dev, pci_dev); - eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE; + /* Check current migration status */ + if (avp_dev_migration_pending(eth_dev)) { + PMD_DRV_LOG(ERR, "VM live migration operation in progress\n"); + return -EBUSY; + } /* Check BAR resources */ ret = avp_dev_check_regions(eth_dev); @@ -650,6 +991,13 @@ eth_avp_dev_init(struct rte_eth_dev *eth_dev) return ret; } + /* Enable interrupts */ + ret = avp_dev_setup_interrupts(eth_dev); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to enable interrupts, ret=%d\n", ret); + return ret; + } + /* Handle each subtype */ ret = avp_dev_create(pci_dev, eth_dev); if (ret < 0) { @@ -658,15 +1006,16 @@ eth_avp_dev_init(struct rte_eth_dev *eth_dev) } /* Allocate memory for storing MAC addresses */ - eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev", ETHER_ADDR_LEN, 0); + eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev", + RTE_ETHER_ADDR_LEN, 0); if (eth_dev->data->mac_addrs == NULL) { PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to store MAC addresses\n", - ETHER_ADDR_LEN); + RTE_ETHER_ADDR_LEN); return -ENOMEM; } /* Get a mac from device config */ - ether_addr_copy(&avp->ethaddr, ð_dev->data->mac_addrs[0]); + rte_ether_addr_copy(&avp->ethaddr, ð_dev->data->mac_addrs[0]); return 0; } @@ -680,27 +1029,65 @@ eth_avp_dev_uninit(struct rte_eth_dev *eth_dev) if (eth_dev->data == NULL) return 0; - if (eth_dev->data->mac_addrs != NULL) { - rte_free(eth_dev->data->mac_addrs); - eth_dev->data->mac_addrs = NULL; - } + avp_dev_close(eth_dev); return 0; } +static int +eth_avp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + struct rte_pci_device *pci_dev) +{ + return rte_eth_dev_pci_generic_probe(pci_dev, sizeof(struct avp_adapter), + eth_avp_dev_init); +} + +static int +eth_avp_pci_remove(struct rte_pci_device *pci_dev) +{ + return rte_eth_dev_pci_generic_remove(pci_dev, + eth_avp_dev_uninit); +} -static struct eth_driver rte_avp_pmd = { - { - .id_table = pci_id_avp_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING, - .probe = rte_eth_dev_pci_probe, - .remove = rte_eth_dev_pci_remove, - }, - .eth_dev_init = eth_avp_dev_init, - .eth_dev_uninit = eth_avp_dev_uninit, - .dev_private_size = sizeof(struct avp_adapter), +static struct rte_pci_driver rte_avp_pmd = { + .id_table = pci_id_avp_map, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .probe = eth_avp_pci_probe, + .remove = eth_avp_pci_remove, }; +static int +avp_dev_enable_scattered(struct rte_eth_dev *eth_dev, + struct avp_dev *avp) +{ + unsigned int max_rx_pkt_len; + + max_rx_pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len; + + if ((max_rx_pkt_len > avp->guest_mbuf_size) || + (max_rx_pkt_len > avp->host_mbuf_size)) { + /* + * If the guest MTU is greater than either the host or guest + * buffers then chained mbufs have to be enabled in the TX + * direction. It is assumed that the application will not need + * to send packets larger than their max_rx_pkt_len (MRU). + */ + return 1; + } + + if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) || + (avp->max_rx_pkt_len > avp->host_mbuf_size)) { + /* + * If the host MRU is greater than its own mbuf size or the + * guest mbuf size then chained mbufs have to be enabled in the + * RX direction. + */ + return 1; + } + + return 0; +} + static int avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id, @@ -727,6 +1114,15 @@ avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev, avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size); avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM; + if (avp_dev_enable_scattered(eth_dev, avp)) { + if (!eth_dev->data->scattered_rx) { + PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n"); + eth_dev->data->scattered_rx = 1; + eth_dev->rx_pkt_burst = avp_recv_scattered_pkts; + eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts; + } + } + PMD_DRV_LOG(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)\n", avp->max_rx_pkt_len, eth_dev->data->dev_conf.rxmode.max_rx_pkt_len, @@ -797,58 +1193,837 @@ avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev, return 0; } -static void -avp_dev_rx_queue_release(void *rx_queue) +static inline int +_avp_cmp_ether_addr(struct rte_ether_addr *a, struct rte_ether_addr *b) { - struct avp_queue *rxq = (struct avp_queue *)rx_queue; - struct avp_dev *avp = rxq->avp; - struct rte_eth_dev_data *data = avp->dev_data; - unsigned int i; + uint16_t *_a = (uint16_t *)&a->addr_bytes[0]; + uint16_t *_b = (uint16_t *)&b->addr_bytes[0]; + return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]); +} - for (i = 0; i < avp->num_rx_queues; i++) { - if (data->rx_queues[i] == rxq) - data->rx_queues[i] = NULL; +static inline int +_avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m) +{ + struct rte_ether_hdr *eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); + + if (likely(_avp_cmp_ether_addr(&avp->ethaddr, ð->d_addr) == 0)) { + /* allow all packets destined to our address */ + return 0; + } + + if (likely(rte_is_broadcast_ether_addr(ð->d_addr))) { + /* allow all broadcast packets */ + return 0; + } + + if (likely(rte_is_multicast_ether_addr(ð->d_addr))) { + /* allow all multicast packets */ + return 0; + } + + if (avp->flags & AVP_F_PROMISC) { + /* allow all packets when in promiscuous mode */ + return 0; } + + return -1; } -static void -avp_dev_tx_queue_release(void *tx_queue) +#ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS +static inline void +__avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf) { - struct avp_queue *txq = (struct avp_queue *)tx_queue; - struct avp_dev *avp = txq->avp; - struct rte_eth_dev_data *data = avp->dev_data; + struct rte_avp_desc *first_buf; + struct rte_avp_desc *pkt_buf; + unsigned int pkt_len; + unsigned int nb_segs; + void *pkt_data; unsigned int i; - for (i = 0; i < avp->num_tx_queues; i++) { - if (data->tx_queues[i] == txq) - data->tx_queues[i] = NULL; - } + first_buf = avp_dev_translate_buffer(avp, buf); + + i = 0; + pkt_len = 0; + nb_segs = first_buf->nb_segs; + do { + /* Adjust pointers for guest addressing */ + pkt_buf = avp_dev_translate_buffer(avp, buf); + if (pkt_buf == NULL) + rte_panic("bad buffer: segment %u has an invalid address %p\n", + i, buf); + pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); + if (pkt_data == NULL) + rte_panic("bad buffer: segment %u has a NULL data pointer\n", + i); + if (pkt_buf->data_len == 0) + rte_panic("bad buffer: segment %u has 0 data length\n", + i); + pkt_len += pkt_buf->data_len; + nb_segs--; + i++; + + } while (nb_segs && (buf = pkt_buf->next) != NULL); + + if (nb_segs != 0) + rte_panic("bad buffer: expected %u segments found %u\n", + first_buf->nb_segs, (first_buf->nb_segs - nb_segs)); + if (pkt_len != first_buf->pkt_len) + rte_panic("bad buffer: expected length %u found %u\n", + first_buf->pkt_len, pkt_len); } -static int -avp_dev_configure(struct rte_eth_dev *eth_dev) -{ - struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); - struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); - struct rte_avp_device_info *host_info; - struct rte_avp_device_config config; - int mask = 0; - void *addr; - int ret; +#define avp_dev_buffer_sanity_check(a, b) \ + __avp_dev_buffer_sanity_check((a), (b)) - addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr; - host_info = (struct rte_avp_device_info *)addr; +#else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */ - /* Setup required number of queues */ - _avp_set_queue_counts(eth_dev); +#define avp_dev_buffer_sanity_check(a, b) do {} while (0) - mask = (ETH_VLAN_STRIP_MASK | - ETH_VLAN_FILTER_MASK | - ETH_VLAN_EXTEND_MASK); - avp_vlan_offload_set(eth_dev, mask); +#endif - /* update device config */ - memset(&config, 0, sizeof(config)); +/* + * Copy a host buffer chain to a set of mbufs. This function assumes that + * there exactly the required number of mbufs to copy all source bytes. + */ +static inline struct rte_mbuf * +avp_dev_copy_from_buffers(struct avp_dev *avp, + struct rte_avp_desc *buf, + struct rte_mbuf **mbufs, + unsigned int count) +{ + struct rte_mbuf *m_previous = NULL; + struct rte_avp_desc *pkt_buf; + unsigned int total_length = 0; + unsigned int copy_length; + unsigned int src_offset; + struct rte_mbuf *m; + uint16_t ol_flags; + uint16_t vlan_tci; + void *pkt_data; + unsigned int i; + + avp_dev_buffer_sanity_check(avp, buf); + + /* setup the first source buffer */ + pkt_buf = avp_dev_translate_buffer(avp, buf); + pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); + total_length = pkt_buf->pkt_len; + src_offset = 0; + + if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) { + ol_flags = PKT_RX_VLAN; + vlan_tci = pkt_buf->vlan_tci; + } else { + ol_flags = 0; + vlan_tci = 0; + } + + for (i = 0; (i < count) && (buf != NULL); i++) { + /* fill each destination buffer */ + m = mbufs[i]; + + if (m_previous != NULL) + m_previous->next = m; + + m_previous = m; + + do { + /* + * Copy as many source buffers as will fit in the + * destination buffer. + */ + copy_length = RTE_MIN((avp->guest_mbuf_size - + rte_pktmbuf_data_len(m)), + (pkt_buf->data_len - + src_offset)); + rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *), + rte_pktmbuf_data_len(m)), + RTE_PTR_ADD(pkt_data, src_offset), + copy_length); + rte_pktmbuf_data_len(m) += copy_length; + src_offset += copy_length; + + if (likely(src_offset == pkt_buf->data_len)) { + /* need a new source buffer */ + buf = pkt_buf->next; + if (buf != NULL) { + pkt_buf = avp_dev_translate_buffer( + avp, buf); + pkt_data = avp_dev_translate_buffer( + avp, pkt_buf->data); + src_offset = 0; + } + } + + if (unlikely(rte_pktmbuf_data_len(m) == + avp->guest_mbuf_size)) { + /* need a new destination mbuf */ + break; + } + + } while (buf != NULL); + } + + m = mbufs[0]; + m->ol_flags = ol_flags; + m->nb_segs = count; + rte_pktmbuf_pkt_len(m) = total_length; + m->vlan_tci = vlan_tci; + + __rte_mbuf_sanity_check(m, 1); + + return m; +} + +static uint16_t +avp_recv_scattered_pkts(void *rx_queue, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct avp_queue *rxq = (struct avp_queue *)rx_queue; + struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST]; + struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS]; + struct avp_dev *avp = rxq->avp; + struct rte_avp_desc *pkt_buf; + struct rte_avp_fifo *free_q; + struct rte_avp_fifo *rx_q; + struct rte_avp_desc *buf; + unsigned int count, avail, n; + unsigned int guest_mbuf_size; + struct rte_mbuf *m; + unsigned int required; + unsigned int buf_len; + unsigned int port_id; + unsigned int i; + + if (unlikely(avp->flags & AVP_F_DETACHED)) { + /* VM live migration in progress */ + return 0; + } + + guest_mbuf_size = avp->guest_mbuf_size; + port_id = avp->port_id; + rx_q = avp->rx_q[rxq->queue_id]; + free_q = avp->free_q[rxq->queue_id]; + + /* setup next queue to service */ + rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ? + (rxq->queue_id + 1) : rxq->queue_base; + + /* determine how many slots are available in the free queue */ + count = avp_fifo_free_count(free_q); + + /* determine how many packets are available in the rx queue */ + avail = avp_fifo_count(rx_q); + + /* determine how many packets can be received */ + count = RTE_MIN(count, avail); + count = RTE_MIN(count, nb_pkts); + count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST); + + if (unlikely(count == 0)) { + /* no free buffers, or no buffers on the rx queue */ + return 0; + } + + /* retrieve pending packets */ + n = avp_fifo_get(rx_q, (void **)&avp_bufs, count); + PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n", + count, rx_q); + + count = 0; + for (i = 0; i < n; i++) { + /* prefetch next entry while processing current one */ + if (i + 1 < n) { + pkt_buf = avp_dev_translate_buffer(avp, + avp_bufs[i + 1]); + rte_prefetch0(pkt_buf); + } + buf = avp_bufs[i]; + + /* Peek into the first buffer to determine the total length */ + pkt_buf = avp_dev_translate_buffer(avp, buf); + buf_len = pkt_buf->pkt_len; + + /* Allocate enough mbufs to receive the entire packet */ + required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size; + if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) { + rxq->dev_data->rx_mbuf_alloc_failed++; + continue; + } + + /* Copy the data from the buffers to our mbufs */ + m = avp_dev_copy_from_buffers(avp, buf, mbufs, required); + + /* finalize mbuf */ + m->port = port_id; + + if (_avp_mac_filter(avp, m) != 0) { + /* silently discard packets not destined to our MAC */ + rte_pktmbuf_free(m); + continue; + } + + /* return new mbuf to caller */ + rx_pkts[count++] = m; + rxq->bytes += buf_len; + } + + rxq->packets += count; + + /* return the buffers to the free queue */ + avp_fifo_put(free_q, (void **)&avp_bufs[0], n); + + return count; +} + + +static uint16_t +avp_recv_pkts(void *rx_queue, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct avp_queue *rxq = (struct avp_queue *)rx_queue; + struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST]; + struct avp_dev *avp = rxq->avp; + struct rte_avp_desc *pkt_buf; + struct rte_avp_fifo *free_q; + struct rte_avp_fifo *rx_q; + unsigned int count, avail, n; + unsigned int pkt_len; + struct rte_mbuf *m; + char *pkt_data; + unsigned int i; + + if (unlikely(avp->flags & AVP_F_DETACHED)) { + /* VM live migration in progress */ + return 0; + } + + rx_q = avp->rx_q[rxq->queue_id]; + free_q = avp->free_q[rxq->queue_id]; + + /* setup next queue to service */ + rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ? + (rxq->queue_id + 1) : rxq->queue_base; + + /* determine how many slots are available in the free queue */ + count = avp_fifo_free_count(free_q); + + /* determine how many packets are available in the rx queue */ + avail = avp_fifo_count(rx_q); + + /* determine how many packets can be received */ + count = RTE_MIN(count, avail); + count = RTE_MIN(count, nb_pkts); + count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST); + + if (unlikely(count == 0)) { + /* no free buffers, or no buffers on the rx queue */ + return 0; + } + + /* retrieve pending packets */ + n = avp_fifo_get(rx_q, (void **)&avp_bufs, count); + PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n", + count, rx_q); + + count = 0; + for (i = 0; i < n; i++) { + /* prefetch next entry while processing current one */ + if (i < n - 1) { + pkt_buf = avp_dev_translate_buffer(avp, + avp_bufs[i + 1]); + rte_prefetch0(pkt_buf); + } + + /* Adjust host pointers for guest addressing */ + pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]); + pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); + pkt_len = pkt_buf->pkt_len; + + if (unlikely((pkt_len > avp->guest_mbuf_size) || + (pkt_buf->nb_segs > 1))) { + /* + * application should be using the scattered receive + * function + */ + rxq->errors++; + continue; + } + + /* process each packet to be transmitted */ + m = rte_pktmbuf_alloc(avp->pool); + if (unlikely(m == NULL)) { + rxq->dev_data->rx_mbuf_alloc_failed++; + continue; + } + + /* copy data out of the host buffer to our buffer */ + m->data_off = RTE_PKTMBUF_HEADROOM; + rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len); + + /* initialize the local mbuf */ + rte_pktmbuf_data_len(m) = pkt_len; + rte_pktmbuf_pkt_len(m) = pkt_len; + m->port = avp->port_id; + + if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) { + m->ol_flags = PKT_RX_VLAN; + m->vlan_tci = pkt_buf->vlan_tci; + } + + if (_avp_mac_filter(avp, m) != 0) { + /* silently discard packets not destined to our MAC */ + rte_pktmbuf_free(m); + continue; + } + + /* return new mbuf to caller */ + rx_pkts[count++] = m; + rxq->bytes += pkt_len; + } + + rxq->packets += count; + + /* return the buffers to the free queue */ + avp_fifo_put(free_q, (void **)&avp_bufs[0], n); + + return count; +} + +/* + * Copy a chained mbuf to a set of host buffers. This function assumes that + * there are sufficient destination buffers to contain the entire source + * packet. + */ +static inline uint16_t +avp_dev_copy_to_buffers(struct avp_dev *avp, + struct rte_mbuf *mbuf, + struct rte_avp_desc **buffers, + unsigned int count) +{ + struct rte_avp_desc *previous_buf = NULL; + struct rte_avp_desc *first_buf = NULL; + struct rte_avp_desc *pkt_buf; + struct rte_avp_desc *buf; + size_t total_length; + struct rte_mbuf *m; + size_t copy_length; + size_t src_offset; + char *pkt_data; + unsigned int i; + + __rte_mbuf_sanity_check(mbuf, 1); + + m = mbuf; + src_offset = 0; + total_length = rte_pktmbuf_pkt_len(m); + for (i = 0; (i < count) && (m != NULL); i++) { + /* fill each destination buffer */ + buf = buffers[i]; + + if (i < count - 1) { + /* prefetch next entry while processing this one */ + pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]); + rte_prefetch0(pkt_buf); + } + + /* Adjust pointers for guest addressing */ + pkt_buf = avp_dev_translate_buffer(avp, buf); + pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); + + /* setup the buffer chain */ + if (previous_buf != NULL) + previous_buf->next = buf; + else + first_buf = pkt_buf; + + previous_buf = pkt_buf; + + do { + /* + * copy as many source mbuf segments as will fit in the + * destination buffer. + */ + copy_length = RTE_MIN((avp->host_mbuf_size - + pkt_buf->data_len), + (rte_pktmbuf_data_len(m) - + src_offset)); + rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len), + RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *), + src_offset), + copy_length); + pkt_buf->data_len += copy_length; + src_offset += copy_length; + + if (likely(src_offset == rte_pktmbuf_data_len(m))) { + /* need a new source buffer */ + m = m->next; + src_offset = 0; + } + + if (unlikely(pkt_buf->data_len == + avp->host_mbuf_size)) { + /* need a new destination buffer */ + break; + } + + } while (m != NULL); + } + + first_buf->nb_segs = count; + first_buf->pkt_len = total_length; + + if (mbuf->ol_flags & PKT_TX_VLAN_PKT) { + first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT; + first_buf->vlan_tci = mbuf->vlan_tci; + } + + avp_dev_buffer_sanity_check(avp, buffers[0]); + + return total_length; +} + + +static uint16_t +avp_xmit_scattered_pkts(void *tx_queue, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST * + RTE_AVP_MAX_MBUF_SEGMENTS)] = {}; + struct avp_queue *txq = (struct avp_queue *)tx_queue; + struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST]; + struct avp_dev *avp = txq->avp; + struct rte_avp_fifo *alloc_q; + struct rte_avp_fifo *tx_q; + unsigned int count, avail, n; + unsigned int orig_nb_pkts; + struct rte_mbuf *m; + unsigned int required; + unsigned int segments; + unsigned int tx_bytes; + unsigned int i; + + orig_nb_pkts = nb_pkts; + if (unlikely(avp->flags & AVP_F_DETACHED)) { + /* VM live migration in progress */ + /* TODO ... buffer for X packets then drop? */ + txq->errors += nb_pkts; + return 0; + } + + tx_q = avp->tx_q[txq->queue_id]; + alloc_q = avp->alloc_q[txq->queue_id]; + + /* limit the number of transmitted packets to the max burst size */ + if (unlikely(nb_pkts > AVP_MAX_TX_BURST)) + nb_pkts = AVP_MAX_TX_BURST; + + /* determine how many buffers are available to copy into */ + avail = avp_fifo_count(alloc_q); + if (unlikely(avail > (AVP_MAX_TX_BURST * + RTE_AVP_MAX_MBUF_SEGMENTS))) + avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS; + + /* determine how many slots are available in the transmit queue */ + count = avp_fifo_free_count(tx_q); + + /* determine how many packets can be sent */ + nb_pkts = RTE_MIN(count, nb_pkts); + + /* determine how many packets will fit in the available buffers */ + count = 0; + segments = 0; + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + if (likely(i < (unsigned int)nb_pkts - 1)) { + /* prefetch next entry while processing this one */ + rte_prefetch0(tx_pkts[i + 1]); + } + required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) / + avp->host_mbuf_size; + + if (unlikely((required == 0) || + (required > RTE_AVP_MAX_MBUF_SEGMENTS))) + break; + else if (unlikely(required + segments > avail)) + break; + segments += required; + count++; + } + nb_pkts = count; + + if (unlikely(nb_pkts == 0)) { + /* no available buffers, or no space on the tx queue */ + txq->errors += orig_nb_pkts; + return 0; + } + + PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n", + nb_pkts, tx_q); + + /* retrieve sufficient send buffers */ + n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments); + if (unlikely(n != segments)) { + PMD_TX_LOG(DEBUG, "Failed to allocate buffers " + "n=%u, segments=%u, orig=%u\n", + n, segments, orig_nb_pkts); + txq->errors += orig_nb_pkts; + return 0; + } + + tx_bytes = 0; + count = 0; + for (i = 0; i < nb_pkts; i++) { + /* process each packet to be transmitted */ + m = tx_pkts[i]; + + /* determine how many buffers are required for this packet */ + required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) / + avp->host_mbuf_size; + + tx_bytes += avp_dev_copy_to_buffers(avp, m, + &avp_bufs[count], required); + tx_bufs[i] = avp_bufs[count]; + count += required; + + /* free the original mbuf */ + rte_pktmbuf_free(m); + } + + txq->packets += nb_pkts; + txq->bytes += tx_bytes; + +#ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS + for (i = 0; i < nb_pkts; i++) + avp_dev_buffer_sanity_check(avp, tx_bufs[i]); +#endif + + /* send the packets */ + n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts); + if (unlikely(n != orig_nb_pkts)) + txq->errors += (orig_nb_pkts - n); + + return n; +} + + +static uint16_t +avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct avp_queue *txq = (struct avp_queue *)tx_queue; + struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST]; + struct avp_dev *avp = txq->avp; + struct rte_avp_desc *pkt_buf; + struct rte_avp_fifo *alloc_q; + struct rte_avp_fifo *tx_q; + unsigned int count, avail, n; + struct rte_mbuf *m; + unsigned int pkt_len; + unsigned int tx_bytes; + char *pkt_data; + unsigned int i; + + if (unlikely(avp->flags & AVP_F_DETACHED)) { + /* VM live migration in progress */ + /* TODO ... buffer for X packets then drop?! */ + txq->errors++; + return 0; + } + + tx_q = avp->tx_q[txq->queue_id]; + alloc_q = avp->alloc_q[txq->queue_id]; + + /* limit the number of transmitted packets to the max burst size */ + if (unlikely(nb_pkts > AVP_MAX_TX_BURST)) + nb_pkts = AVP_MAX_TX_BURST; + + /* determine how many buffers are available to copy into */ + avail = avp_fifo_count(alloc_q); + + /* determine how many slots are available in the transmit queue */ + count = avp_fifo_free_count(tx_q); + + /* determine how many packets can be sent */ + count = RTE_MIN(count, avail); + count = RTE_MIN(count, nb_pkts); + + if (unlikely(count == 0)) { + /* no available buffers, or no space on the tx queue */ + txq->errors += nb_pkts; + return 0; + } + + PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n", + count, tx_q); + + /* retrieve sufficient send buffers */ + n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count); + if (unlikely(n != count)) { + txq->errors++; + return 0; + } + + tx_bytes = 0; + for (i = 0; i < count; i++) { + /* prefetch next entry while processing the current one */ + if (i < count - 1) { + pkt_buf = avp_dev_translate_buffer(avp, + avp_bufs[i + 1]); + rte_prefetch0(pkt_buf); + } + + /* process each packet to be transmitted */ + m = tx_pkts[i]; + + /* Adjust pointers for guest addressing */ + pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]); + pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); + pkt_len = rte_pktmbuf_pkt_len(m); + + if (unlikely((pkt_len > avp->guest_mbuf_size) || + (pkt_len > avp->host_mbuf_size))) { + /* + * application should be using the scattered transmit + * function; send it truncated to avoid the performance + * hit of having to manage returning the already + * allocated buffer to the free list. This should not + * happen since the application should have set the + * max_rx_pkt_len based on its MTU and it should be + * policing its own packet sizes. + */ + txq->errors++; + pkt_len = RTE_MIN(avp->guest_mbuf_size, + avp->host_mbuf_size); + } + + /* copy data out of our mbuf and into the AVP buffer */ + rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len); + pkt_buf->pkt_len = pkt_len; + pkt_buf->data_len = pkt_len; + pkt_buf->nb_segs = 1; + pkt_buf->next = NULL; + + if (m->ol_flags & PKT_TX_VLAN_PKT) { + pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT; + pkt_buf->vlan_tci = m->vlan_tci; + } + + tx_bytes += pkt_len; + + /* free the original mbuf */ + rte_pktmbuf_free(m); + } + + txq->packets += count; + txq->bytes += tx_bytes; + + /* send the packets */ + n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count); + + return n; +} + +static void +avp_dev_rx_queue_release(void *rx_queue) +{ + struct avp_queue *rxq = (struct avp_queue *)rx_queue; + struct avp_dev *avp = rxq->avp; + struct rte_eth_dev_data *data = avp->dev_data; + unsigned int i; + + for (i = 0; i < avp->num_rx_queues; i++) { + if (data->rx_queues[i] == rxq) { + rte_free(data->rx_queues[i]); + data->rx_queues[i] = NULL; + } + } +} + +static void +avp_dev_rx_queue_release_all(struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + struct rte_eth_dev_data *data = avp->dev_data; + unsigned int i; + + for (i = 0; i < avp->num_rx_queues; i++) { + if (data->rx_queues[i]) { + rte_free(data->rx_queues[i]); + data->rx_queues[i] = NULL; + } + } +} + +static void +avp_dev_tx_queue_release(void *tx_queue) +{ + struct avp_queue *txq = (struct avp_queue *)tx_queue; + struct avp_dev *avp = txq->avp; + struct rte_eth_dev_data *data = avp->dev_data; + unsigned int i; + + for (i = 0; i < avp->num_tx_queues; i++) { + if (data->tx_queues[i] == txq) { + rte_free(data->tx_queues[i]); + data->tx_queues[i] = NULL; + } + } +} + +static void +avp_dev_tx_queue_release_all(struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + struct rte_eth_dev_data *data = avp->dev_data; + unsigned int i; + + for (i = 0; i < avp->num_tx_queues; i++) { + if (data->tx_queues[i]) { + rte_free(data->tx_queues[i]); + data->tx_queues[i] = NULL; + } + } +} + +static int +avp_dev_configure(struct rte_eth_dev *eth_dev) +{ + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + struct rte_avp_device_info *host_info; + struct rte_avp_device_config config; + int mask = 0; + void *addr; + int ret; + + rte_spinlock_lock(&avp->lock); + if (avp->flags & AVP_F_DETACHED) { + PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); + ret = -ENOTSUP; + goto unlock; + } + + addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr; + host_info = (struct rte_avp_device_info *)addr; + + /* Setup required number of queues */ + _avp_set_queue_counts(eth_dev); + + mask = (ETH_VLAN_STRIP_MASK | + ETH_VLAN_FILTER_MASK | + ETH_VLAN_EXTEND_MASK); + ret = avp_vlan_offload_set(eth_dev, mask); + if (ret < 0) { + PMD_DRV_LOG(ERR, "VLAN offload set failed by host, ret=%d\n", + ret); + goto unlock; + } + + /* update device config */ + memset(&config, 0, sizeof(config)); config.device_id = host_info->device_id; config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK; config.driver_version = AVP_DPDK_DRIVER_VERSION; @@ -867,9 +2042,104 @@ avp_dev_configure(struct rte_eth_dev *eth_dev) ret = 0; unlock: + rte_spinlock_unlock(&avp->lock); return ret; } +static int +avp_dev_start(struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + int ret; + + rte_spinlock_lock(&avp->lock); + if (avp->flags & AVP_F_DETACHED) { + PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); + ret = -ENOTSUP; + goto unlock; + } + + /* update link state */ + ret = avp_dev_ctrl_set_link_state(eth_dev, 1); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n", + ret); + goto unlock; + } + + /* remember current link state */ + avp->flags |= AVP_F_LINKUP; + + ret = 0; + +unlock: + rte_spinlock_unlock(&avp->lock); + return ret; +} + +static void +avp_dev_stop(struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + int ret; + + rte_spinlock_lock(&avp->lock); + if (avp->flags & AVP_F_DETACHED) { + PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); + goto unlock; + } + + /* remember current link state */ + avp->flags &= ~AVP_F_LINKUP; + + /* update link state */ + ret = avp_dev_ctrl_set_link_state(eth_dev, 0); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n", + ret); + } + +unlock: + rte_spinlock_unlock(&avp->lock); +} + +static void +avp_dev_close(struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + int ret; + + rte_spinlock_lock(&avp->lock); + if (avp->flags & AVP_F_DETACHED) { + PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); + goto unlock; + } + + /* remember current link state */ + avp->flags &= ~AVP_F_LINKUP; + avp->flags &= ~AVP_F_CONFIGURED; + + ret = avp_dev_disable_interrupts(eth_dev); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to disable interrupts\n"); + /* continue */ + } + + /* update device state */ + ret = avp_dev_ctrl_shutdown(eth_dev); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Device shutdown failed by host, ret=%d\n", + ret); + /* continue */ + } + + /* release dynamic storage for rx/tx queues */ + avp_dev_rx_queue_release_all(eth_dev); + avp_dev_tx_queue_release_all(eth_dev); + +unlock: + rte_spinlock_unlock(&avp->lock); +} static int avp_dev_link_update(struct rte_eth_dev *eth_dev, @@ -885,15 +2155,44 @@ avp_dev_link_update(struct rte_eth_dev *eth_dev, return -1; } +static int +avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); -static void + rte_spinlock_lock(&avp->lock); + if ((avp->flags & AVP_F_PROMISC) == 0) { + avp->flags |= AVP_F_PROMISC; + PMD_DRV_LOG(DEBUG, "Promiscuous mode enabled on %u\n", + eth_dev->data->port_id); + } + rte_spinlock_unlock(&avp->lock); + + return 0; +} + +static int +avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + + rte_spinlock_lock(&avp->lock); + if ((avp->flags & AVP_F_PROMISC) != 0) { + avp->flags &= ~AVP_F_PROMISC; + PMD_DRV_LOG(DEBUG, "Promiscuous mode disabled on %u\n", + eth_dev->data->port_id); + } + rte_spinlock_unlock(&avp->lock); + + return 0; +} + +static int avp_dev_info_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *dev_info) { struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); - dev_info->driver_name = "rte_avp_pmd"; - dev_info->pci_dev = RTE_DEV_TO_PCI(eth_dev->device); dev_info->max_rx_queues = avp->max_rx_queues; dev_info->max_tx_queues = avp->max_tx_queues; dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE; @@ -903,16 +2202,20 @@ avp_dev_info_get(struct rte_eth_dev *eth_dev, dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP; dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT; } + + return 0; } -static void +static int avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask) { struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + struct rte_eth_conf *dev_conf = ð_dev->data->dev_conf; + uint64_t offloads = dev_conf->rxmode.offloads; if (mask & ETH_VLAN_STRIP_MASK) { if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) { - if (eth_dev->data->dev_conf.rxmode.hw_vlan_strip) + if (offloads & DEV_RX_OFFLOAD_VLAN_STRIP) avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD; else avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD; @@ -922,15 +2225,83 @@ avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask) } if (mask & ETH_VLAN_FILTER_MASK) { - if (eth_dev->data->dev_conf.rxmode.hw_vlan_filter) + if (offloads & DEV_RX_OFFLOAD_VLAN_FILTER) PMD_DRV_LOG(ERR, "VLAN filter offload not supported\n"); } if (mask & ETH_VLAN_EXTEND_MASK) { - if (eth_dev->data->dev_conf.rxmode.hw_vlan_extend) + if (offloads & DEV_RX_OFFLOAD_VLAN_EXTEND) PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n"); } + + return 0; +} + +static int +avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + unsigned int i; + + for (i = 0; i < avp->num_rx_queues; i++) { + struct avp_queue *rxq = avp->dev_data->rx_queues[i]; + + if (rxq) { + stats->ipackets += rxq->packets; + stats->ibytes += rxq->bytes; + stats->ierrors += rxq->errors; + + stats->q_ipackets[i] += rxq->packets; + stats->q_ibytes[i] += rxq->bytes; + stats->q_errors[i] += rxq->errors; + } + } + + for (i = 0; i < avp->num_tx_queues; i++) { + struct avp_queue *txq = avp->dev_data->tx_queues[i]; + + if (txq) { + stats->opackets += txq->packets; + stats->obytes += txq->bytes; + stats->oerrors += txq->errors; + + stats->q_opackets[i] += txq->packets; + stats->q_obytes[i] += txq->bytes; + } + } + + return 0; +} + +static int +avp_dev_stats_reset(struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + unsigned int i; + + for (i = 0; i < avp->num_rx_queues; i++) { + struct avp_queue *rxq = avp->dev_data->rx_queues[i]; + + if (rxq) { + rxq->bytes = 0; + rxq->packets = 0; + rxq->errors = 0; + } + } + + for (i = 0; i < avp->num_tx_queues; i++) { + struct avp_queue *txq = avp->dev_data->tx_queues[i]; + + if (txq) { + txq->bytes = 0; + txq->packets = 0; + txq->errors = 0; + } + } + + return 0; } -RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd.pci_drv); +RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd); RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map); +RTE_LOG_REGISTER(avp_logtype_driver, pmd.net.avp.driver, NOTICE);