X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_vhost%2Fvhost_rxtx.c;h=12ce0cc3c204b62a6a1eb92fcae6e83964e39afe;hb=699e3577e6d81980abc22cb7665f4dc71e14fbd1;hp=7026bfa12c12158f947f116563f77da56b4a170b;hpb=5dab2f11370fd9f7d76efa453e7e4277cf33f140;p=dpdk.git diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c index 7026bfa12c..12ce0cc3c2 100644 --- a/lib/librte_vhost/vhost_rxtx.c +++ b/lib/librte_vhost/vhost_rxtx.c @@ -32,15 +32,101 @@ */ #include +#include #include #include #include +#include +#include #include +#include +#include +#include #include "vhost-net.h" #define MAX_PKT_BURST 32 +#define VHOST_LOG_PAGE 4096 + +static inline void __attribute__((always_inline)) +vhost_log_page(uint8_t *log_base, uint64_t page) +{ + log_base[page / 8] |= 1 << (page % 8); +} + +static inline void __attribute__((always_inline)) +vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len) +{ + uint64_t page; + + if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) || + !dev->log_base || !len)) + return; + + if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8))) + return; + + /* To make sure guest memory updates are committed before logging */ + rte_smp_wmb(); + + page = addr / VHOST_LOG_PAGE; + while (page * VHOST_LOG_PAGE < addr + len) { + vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page); + page += 1; + } +} + +static inline void __attribute__((always_inline)) +vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t offset, uint64_t len) +{ + vhost_log_write(dev, vq->log_guest_addr + offset, len); +} + +static bool +is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t qp_nb) +{ + return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM; +} + +static void +virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) +{ + memset(net_hdr, 0, sizeof(struct virtio_net_hdr)); + + if (m_buf->ol_flags & PKT_TX_L4_MASK) { + net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len; + + switch (m_buf->ol_flags & PKT_TX_L4_MASK) { + case PKT_TX_TCP_CKSUM: + net_hdr->csum_offset = (offsetof(struct tcp_hdr, + cksum)); + break; + case PKT_TX_UDP_CKSUM: + net_hdr->csum_offset = (offsetof(struct udp_hdr, + dgram_cksum)); + break; + case PKT_TX_SCTP_CKSUM: + net_hdr->csum_offset = (offsetof(struct sctp_hdr, + cksum)); + break; + } + } + + if (m_buf->ol_flags & PKT_TX_TCP_SEG) { + if (m_buf->ol_flags & PKT_TX_IPV4) + net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; + else + net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; + net_hdr->gso_size = m_buf->tso_segsz; + net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len + + m_buf->l4_len; + } + + return; +} /** * This function adds buffers to the virtio devices RX virtqueue. Buffers can @@ -54,8 +140,8 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count) { struct vhost_virtqueue *vq; - struct vring_desc *desc; - struct rte_mbuf *buff; + struct vring_desc *desc, *hdr_desc; + struct rte_mbuf *buff, *first_buff; /* The virtio_hdr is initialised to 0. */ struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; uint64_t buff_addr = 0; @@ -68,12 +154,17 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, uint8_t success = 0; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh); - if (unlikely(queue_id != VIRTIO_RXQ)) { - LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n"); + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) { + RTE_LOG(ERR, VHOST_DATA, + "%s (%"PRIu64"): virtqueue idx:%d invalid.\n", + __func__, dev->device_fh, queue_id); return 0; } - vq = dev->virtqueue[VIRTIO_RXQ]; + vq = dev->virtqueue[queue_id]; + if (unlikely(vq->enabled == 0)) + return 0; + count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count; /* @@ -117,11 +208,13 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, uint32_t offset = 0, vb_offset = 0; uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0; uint8_t hdr = 0, uncompleted_pkt = 0; + uint16_t idx; /* Get descriptor from available ring */ desc = &vq->desc[head[packet_success]]; buff = pkts[packet_success]; + first_buff = buff; /* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */ buff_addr = gpa_to_vva(dev, desc->addr); @@ -130,6 +223,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, /* Copy virtio_hdr to packet and increment buffer address */ buff_hdr_addr = buff_addr; + hdr_desc = desc; /* * If the descriptors are chained the header and data are @@ -154,6 +248,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, rte_memcpy((void *)(uintptr_t)(buff_addr + vb_offset), rte_pktmbuf_mtod_offset(buff, const void *, offset), len_to_cpy); + vhost_log_write(dev, desc->addr + vb_offset, len_to_cpy); PRINT_PACKET(dev, (uintptr_t)(buff_addr + vb_offset), len_to_cpy, 0); @@ -188,16 +283,18 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, } /* Update used ring with desc information */ - vq->used->ring[res_cur_idx & (vq->size - 1)].id = - head[packet_success]; + idx = res_cur_idx & (vq->size - 1); + vq->used->ring[idx].id = head[packet_success]; /* Drop the packet if it is uncompleted */ if (unlikely(uncompleted_pkt == 1)) - vq->used->ring[res_cur_idx & (vq->size - 1)].len = - vq->vhost_hlen; + vq->used->ring[idx].len = vq->vhost_hlen; else - vq->used->ring[res_cur_idx & (vq->size - 1)].len = - pkt_len + vq->vhost_hlen; + vq->used->ring[idx].len = pkt_len + vq->vhost_hlen; + + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, ring[idx]), + sizeof(vq->used->ring[idx])); res_cur_idx++; packet_success++; @@ -205,8 +302,11 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, if (unlikely(uncompleted_pkt == 1)) continue; + virtio_enqueue_offload(first_buff, &virtio_hdr.hdr); + rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen); + vhost_log_write(dev, hdr_desc->addr, vq->vhost_hlen); PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1); @@ -224,6 +324,9 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, *(volatile uint16_t *)&vq->used->idx += count; vq->last_used_idx = res_end_idx; + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, idx), + sizeof(vq->used->idx)); /* flush used->idx update before we read avail->flags. */ rte_mb(); @@ -235,8 +338,9 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, } static inline uint32_t __attribute__((always_inline)) -copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx, - uint16_t res_end_idx, struct rte_mbuf *pkt) +copy_from_mbuf_to_vring(struct virtio_net *dev, uint32_t queue_id, + uint16_t res_base_idx, uint16_t res_end_idx, + struct rte_mbuf *pkt) { uint32_t vec_idx = 0; uint32_t entry_success = 0; @@ -252,6 +356,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx, uint32_t seg_avail; uint32_t vb_avail; uint32_t cpy_len, entry_len; + uint16_t idx; if (pkt == NULL) return 0; @@ -264,7 +369,8 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx, * Convert from gpa to vva * (guest physical addr -> vhost virtual addr) */ - vq = dev->virtqueue[VIRTIO_RXQ]; + vq = dev->virtqueue[queue_id]; + vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr); vb_hdr_addr = vb_addr; @@ -276,8 +382,11 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx, LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n", dev->device_fh, virtio_hdr.num_buffers); + virtio_enqueue_offload(pkt, &virtio_hdr.hdr); + rte_memcpy((void *)(uintptr_t)vb_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen); + vhost_log_write(dev, vq->buf_vec[vec_idx].buf_addr, vq->vhost_hlen); PRINT_PACKET(dev, (uintptr_t)vb_hdr_addr, vq->vhost_hlen, 1); @@ -288,16 +397,18 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx, entry_len = vq->vhost_hlen; if (vb_avail == 0) { - uint32_t desc_idx = - vq->buf_vec[vec_idx].desc_idx; + uint32_t desc_idx = vq->buf_vec[vec_idx].desc_idx; + + if ((vq->desc[desc_idx].flags & VRING_DESC_F_NEXT) == 0) { + idx = cur_idx & (vq->size - 1); - if ((vq->desc[desc_idx].flags - & VRING_DESC_F_NEXT) == 0) { /* Update used ring with desc information */ - vq->used->ring[cur_idx & (vq->size - 1)].id - = vq->buf_vec[vec_idx].desc_idx; - vq->used->ring[cur_idx & (vq->size - 1)].len - = entry_len; + vq->used->ring[idx].id = vq->buf_vec[vec_idx].desc_idx; + vq->used->ring[idx].len = entry_len; + + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, ring[idx]), + sizeof(vq->used->ring[idx])); entry_len = 0; cur_idx++; @@ -320,6 +431,8 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx, rte_memcpy((void *)(uintptr_t)(vb_addr + vb_offset), rte_pktmbuf_mtod_offset(pkt, const void *, seg_offset), cpy_len); + vhost_log_write(dev, vq->buf_vec[vec_idx].buf_addr + vb_offset, + cpy_len); PRINT_PACKET(dev, (uintptr_t)(vb_addr + vb_offset), @@ -340,10 +453,13 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx, if ((vq->desc[vq->buf_vec[vec_idx].desc_idx].flags & VRING_DESC_F_NEXT) == 0) { /* Update used ring with desc information */ - vq->used->ring[cur_idx & (vq->size - 1)].id + idx = cur_idx & (vq->size - 1); + vq->used->ring[idx].id = vq->buf_vec[vec_idx].desc_idx; - vq->used->ring[cur_idx & (vq->size - 1)].len - = entry_len; + vq->used->ring[idx].len = entry_len; + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, ring[idx]), + sizeof(vq->used->ring[idx])); entry_len = 0; cur_idx++; entry_success++; @@ -376,16 +492,18 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx, if ((vq->desc[desc_idx].flags & VRING_DESC_F_NEXT) == 0) { - uint16_t wrapped_idx = - cur_idx & (vq->size - 1); + idx = cur_idx & (vq->size - 1); /* * Update used ring with the * descriptor information */ - vq->used->ring[wrapped_idx].id + vq->used->ring[idx].id = desc_idx; - vq->used->ring[wrapped_idx].len + vq->used->ring[idx].len = entry_len; + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, ring[idx]), + sizeof(vq->used->ring[idx])); entry_success++; entry_len = 0; cur_idx++; @@ -408,10 +526,13 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx, * This whole packet completes. */ /* Update used ring with desc information */ - vq->used->ring[cur_idx & (vq->size - 1)].id + idx = cur_idx & (vq->size - 1); + vq->used->ring[idx].id = vq->buf_vec[vec_idx].desc_idx; - vq->used->ring[cur_idx & (vq->size - 1)].len - = entry_len; + vq->used->ring[idx].len = entry_len; + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, ring[idx]), + sizeof(vq->used->ring[idx])); entry_success++; break; } @@ -464,11 +585,17 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n", dev->device_fh); - if (unlikely(queue_id != VIRTIO_RXQ)) { - LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n"); + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) { + RTE_LOG(ERR, VHOST_DATA, + "%s (%"PRIu64"): virtqueue idx:%d invalid.\n", + __func__, dev->device_fh, queue_id); + return 0; } - vq = dev->virtqueue[VIRTIO_RXQ]; + vq = dev->virtqueue[queue_id]; + if (unlikely(vq->enabled == 0)) + return 0; + count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); if (count == 0) @@ -490,17 +617,12 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, do { avail_idx = *((volatile uint16_t *)&vq->avail->idx); - if (unlikely(res_cur_idx == avail_idx)) { - LOG_DEBUG(VHOST_DATA, - "(%"PRIu64") Failed " - "to get enough desc from " - "vring\n", - dev->device_fh); + if (unlikely(res_cur_idx == avail_idx)) goto merge_rx_exit; - } else { - update_secure_len(vq, res_cur_idx, &secure_len, &vec_idx); - res_cur_idx++; - } + + update_secure_len(vq, res_cur_idx, + &secure_len, &vec_idx); + res_cur_idx++; } while (pkt_len > secure_len); /* vq->last_used_idx_res is atomically updated. */ @@ -509,8 +631,8 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, res_cur_idx); } while (success == 0); - entry_success = copy_from_mbuf_to_vring(dev, res_base_idx, - res_cur_idx, pkts[pkt_idx]); + entry_success = copy_from_mbuf_to_vring(dev, queue_id, + res_base_idx, res_cur_idx, pkts[pkt_idx]); rte_compiler_barrier(); @@ -548,6 +670,97 @@ rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id, return virtio_dev_rx(dev, queue_id, pkts, count); } +static void +parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr) +{ + struct ipv4_hdr *ipv4_hdr; + struct ipv6_hdr *ipv6_hdr; + void *l3_hdr = NULL; + struct ether_hdr *eth_hdr; + uint16_t ethertype; + + eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + + m->l2_len = sizeof(struct ether_hdr); + ethertype = rte_be_to_cpu_16(eth_hdr->ether_type); + + if (ethertype == ETHER_TYPE_VLAN) { + struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1); + + m->l2_len += sizeof(struct vlan_hdr); + ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto); + } + + l3_hdr = (char *)eth_hdr + m->l2_len; + + switch (ethertype) { + case ETHER_TYPE_IPv4: + ipv4_hdr = (struct ipv4_hdr *)l3_hdr; + *l4_proto = ipv4_hdr->next_proto_id; + m->l3_len = (ipv4_hdr->version_ihl & 0x0f) * 4; + *l4_hdr = (char *)l3_hdr + m->l3_len; + m->ol_flags |= PKT_TX_IPV4; + break; + case ETHER_TYPE_IPv6: + ipv6_hdr = (struct ipv6_hdr *)l3_hdr; + *l4_proto = ipv6_hdr->proto; + m->l3_len = sizeof(struct ipv6_hdr); + *l4_hdr = (char *)l3_hdr + m->l3_len; + m->ol_flags |= PKT_TX_IPV6; + break; + default: + m->l3_len = 0; + *l4_proto = 0; + break; + } +} + +static inline void __attribute__((always_inline)) +vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m) +{ + uint16_t l4_proto = 0; + void *l4_hdr = NULL; + struct tcp_hdr *tcp_hdr = NULL; + + parse_ethernet(m, &l4_proto, &l4_hdr); + if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { + if (hdr->csum_start == (m->l2_len + m->l3_len)) { + switch (hdr->csum_offset) { + case (offsetof(struct tcp_hdr, cksum)): + if (l4_proto == IPPROTO_TCP) + m->ol_flags |= PKT_TX_TCP_CKSUM; + break; + case (offsetof(struct udp_hdr, dgram_cksum)): + if (l4_proto == IPPROTO_UDP) + m->ol_flags |= PKT_TX_UDP_CKSUM; + break; + case (offsetof(struct sctp_hdr, cksum)): + if (l4_proto == IPPROTO_SCTP) + m->ol_flags |= PKT_TX_SCTP_CKSUM; + break; + default: + break; + } + } + } + + if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: + case VIRTIO_NET_HDR_GSO_TCPV6: + tcp_hdr = (struct tcp_hdr *)l4_hdr; + m->ol_flags |= PKT_TX_TCP_SEG; + m->tso_segsz = hdr->gso_size; + m->l4_len = (tcp_hdr->data_off & 0xf0) >> 2; + break; + default: + RTE_LOG(WARNING, VHOST_DATA, + "unsupported gso type %u.\n", hdr->gso_type); + break; + } + } +} + uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) @@ -556,18 +769,25 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, struct vhost_virtqueue *vq; struct vring_desc *desc; uint64_t vb_addr = 0; + uint64_t vb_net_hdr_addr = 0; uint32_t head[MAX_PKT_BURST]; uint32_t used_idx; uint32_t i; uint16_t free_entries, entry_success = 0; uint16_t avail_idx; + struct virtio_net_hdr *hdr = NULL; - if (unlikely(queue_id != VIRTIO_TXQ)) { - LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n"); + if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) { + RTE_LOG(ERR, VHOST_DATA, + "%s (%"PRIu64"): virtqueue idx:%d invalid.\n", + __func__, dev->device_fh, queue_id); return 0; } - vq = dev->virtqueue[VIRTIO_TXQ]; + vq = dev->virtqueue[queue_id]; + if (unlikely(vq->enabled == 0)) + return 0; + avail_idx = *((volatile uint16_t *)&vq->avail->idx); /* If there are no available buffers then return. */ @@ -607,6 +827,9 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, desc = &vq->desc[head[entry_success]]; + vb_net_hdr_addr = gpa_to_vva(dev, desc->addr); + hdr = (struct virtio_net_hdr *)((uintptr_t)vb_net_hdr_addr); + /* Discard first buffer as it is the virtio header */ if (desc->flags & VRING_DESC_F_NEXT) { desc = &vq->desc[desc->next]; @@ -633,6 +856,9 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, /* Update used index buffer information. */ vq->used->ring[used_idx].id = head[entry_success]; vq->used->ring[used_idx].len = 0; + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, ring[used_idx]), + sizeof(vq->used->ring[used_idx])); /* Allocate an mbuf and populate the structure. */ m = rte_pktmbuf_alloc(mbuf_pool); @@ -745,6 +971,8 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, break; m->nb_segs = seg_num; + if ((hdr->flags != 0) || (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE)) + vhost_dequeue_offload(hdr, m); pkts[entry_success] = m; vq->last_used_idx++; @@ -753,6 +981,8 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, rte_compiler_barrier(); vq->used->idx += entry_success; + vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), + sizeof(vq->used->idx)); /* Kick guest if required. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) eventfd_write(vq->callfd, (eventfd_t)1);