X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fvirtio%2Fvirtio_rxtx.c;h=b4c4aa4722b3b458dbce127aa5719b4ff87ebe47;hb=1b437b977552fa5e491b5faaf390f4ed9baeb2cc;hp=9ab441bc34f66f5882bf7e93a9a1325990d134ce;hpb=2d7c37194ee42bf31e401498adbd51dd14a6d6b5;p=dpdk.git diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c index 9ab441bc34..b4c4aa4722 100644 --- a/drivers/net/virtio/virtio_rxtx.c +++ b/drivers/net/virtio/virtio_rxtx.c @@ -51,6 +51,10 @@ #include #include #include +#include +#include +#include +#include #include "virtio_logs.h" #include "virtio_ethdev.h" @@ -205,18 +209,70 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) return 0; } +/* When doing TSO, the IP length is not included in the pseudo header + * checksum of the packet given to the PMD, but for virtio it is + * expected. + */ +static void +virtio_tso_fix_cksum(struct rte_mbuf *m) +{ + /* common case: header is not fragmented */ + if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len + + m->l4_len)) { + struct ipv4_hdr *iph; + struct ipv6_hdr *ip6h; + struct tcp_hdr *th; + uint16_t prev_cksum, new_cksum, ip_len, ip_paylen; + uint32_t tmp; + + iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); + th = RTE_PTR_ADD(iph, m->l3_len); + if ((iph->version_ihl >> 4) == 4) { + iph->hdr_checksum = 0; + iph->hdr_checksum = rte_ipv4_cksum(iph); + ip_len = iph->total_length; + ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) - + m->l3_len); + } else { + ip6h = (struct ipv6_hdr *)iph; + ip_paylen = ip6h->payload_len; + } + + /* calculate the new phdr checksum not including ip_paylen */ + prev_cksum = th->cksum; + tmp = prev_cksum; + tmp += ip_paylen; + tmp = (tmp & 0xffff) + (tmp >> 16); + new_cksum = tmp; + + /* replace it in the packet */ + th->cksum = new_cksum; + } +} + +static inline int +tx_offload_enabled(struct virtio_hw *hw) +{ + return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) || + vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) || + vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6); +} + static inline void virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, uint16_t needed, int use_indirect, int can_push) { + struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr; struct vq_desc_extra *dxp; struct virtqueue *vq = txvq->vq; struct vring_desc *start_dp; uint16_t seg_num = cookie->nb_segs; uint16_t head_idx, idx; uint16_t head_size = vq->hw->vtnet_hdr_size; - unsigned long offs; + struct virtio_net_hdr *hdr; + int offload; + offload = tx_offload_enabled(vq->hw); head_idx = vq->vq_desc_head_idx; idx = head_idx; dxp = &vq->vq_descx[idx]; @@ -226,10 +282,12 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, start_dp = vq->vq_ring.desc; if (can_push) { - /* put on zero'd transmit header (no offloads) */ - void *hdr = rte_pktmbuf_prepend(cookie, head_size); - - memset(hdr, 0, head_size); + /* prepend cannot fail, checked by caller */ + hdr = (struct virtio_net_hdr *) + rte_pktmbuf_prepend(cookie, head_size); + /* if offload disabled, it is not zeroed below, do it now */ + if (offload == 0) + memset(hdr, 0, head_size); } else if (use_indirect) { /* setup tx ring slot to point to indirect * descriptor list stored in reserved region. @@ -237,14 +295,11 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, * the first slot in indirect ring is already preset * to point to the header in reserved region */ - struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr; - - offs = idx * sizeof(struct virtio_tx_region) - + offsetof(struct virtio_tx_region, tx_indir); - - start_dp[idx].addr = txvq->virtio_net_hdr_mem + offs; + start_dp[idx].addr = txvq->virtio_net_hdr_mem + + RTE_PTR_DIFF(&txr[idx].tx_indir, txr); start_dp[idx].len = (seg_num + 1) * sizeof(struct vring_desc); start_dp[idx].flags = VRING_DESC_F_INDIRECT; + hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; /* loop below will fill in rest of the indirect elements */ start_dp = txr[idx].tx_indir; @@ -253,15 +308,59 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, /* setup first tx ring slot to point to header * stored in reserved region. */ - offs = idx * sizeof(struct virtio_tx_region) - + offsetof(struct virtio_tx_region, tx_hdr); - - start_dp[idx].addr = txvq->virtio_net_hdr_mem + offs; + start_dp[idx].addr = txvq->virtio_net_hdr_mem + + RTE_PTR_DIFF(&txr[idx].tx_hdr, txr); start_dp[idx].len = vq->hw->vtnet_hdr_size; start_dp[idx].flags = VRING_DESC_F_NEXT; + hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; + idx = start_dp[idx].next; } + /* Checksum Offload / TSO */ + if (offload) { + if (cookie->ol_flags & PKT_TX_TCP_SEG) + cookie->ol_flags |= PKT_TX_TCP_CKSUM; + + switch (cookie->ol_flags & PKT_TX_L4_MASK) { + case PKT_TX_UDP_CKSUM: + hdr->csum_start = cookie->l2_len + cookie->l3_len; + hdr->csum_offset = offsetof(struct udp_hdr, + dgram_cksum); + hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + break; + + case PKT_TX_TCP_CKSUM: + hdr->csum_start = cookie->l2_len + cookie->l3_len; + hdr->csum_offset = offsetof(struct tcp_hdr, cksum); + hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + break; + + default: + hdr->csum_start = 0; + hdr->csum_offset = 0; + hdr->flags = 0; + break; + } + + /* TCP Segmentation Offload */ + if (cookie->ol_flags & PKT_TX_TCP_SEG) { + virtio_tso_fix_cksum(cookie); + hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ? + VIRTIO_NET_HDR_GSO_TCPV6 : + VIRTIO_NET_HDR_GSO_TCPV4; + hdr->gso_size = cookie->tso_segsz; + hdr->hdr_len = + cookie->l2_len + + cookie->l3_len + + cookie->l4_len; + } else { + hdr->gso_type = 0; + hdr->gso_size = 0; + hdr->hdr_len = 0; + } + } + do { start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq); start_dp[idx].len = cookie->data_len; @@ -525,11 +624,6 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, PMD_INIT_FUNC_TRACE(); - if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS) - != ETH_TXQ_FLAGS_NOXSUMS) { - PMD_INIT_LOG(ERR, "TX checksum offload not supported\n"); - return -EINVAL; - } virtio_update_rxtx_handler(dev, tx_conf); @@ -632,6 +726,86 @@ virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf) } } +/* Optionally fill offload information in structure */ +static int +virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr) +{ + struct rte_net_hdr_lens hdr_lens; + uint32_t hdrlen, ptype; + int l4_supported = 0; + + /* nothing to do */ + if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) + return 0; + + m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN; + + ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); + m->packet_type = ptype; + if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || + (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || + (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) + l4_supported = 1; + + if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { + hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; + if (hdr->csum_start <= hdrlen && l4_supported) { + m->ol_flags |= PKT_RX_L4_CKSUM_NONE; + } else { + /* Unknown proto or tunnel, do sw cksum. We can assume + * the cksum field is in the first segment since the + * buffers we provided to the host are large enough. + * In case of SCTP, this will be wrong since it's a CRC + * but there's nothing we can do. + */ + uint16_t csum, off; + + rte_raw_cksum_mbuf(m, hdr->csum_start, + rte_pktmbuf_pkt_len(m) - hdr->csum_start, + &csum); + if (likely(csum != 0xffff)) + csum = ~csum; + off = hdr->csum_offset + hdr->csum_start; + if (rte_pktmbuf_data_len(m) >= off + 1) + *rte_pktmbuf_mtod_offset(m, uint16_t *, + off) = csum; + } + } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) { + m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; + } + + /* GSO request, save required information in mbuf */ + if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { + /* Check unsupported modes */ + if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) || + (hdr->gso_size == 0)) { + return -EINVAL; + } + + /* Update mss lengthes in mbuf */ + m->tso_segsz = hdr->gso_size; + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: + case VIRTIO_NET_HDR_GSO_TCPV6: + m->ol_flags |= PKT_RX_LRO | \ + PKT_RX_L4_CKSUM_NONE; + break; + default: + return -EINVAL; + } + } + + return 0; +} + +static inline int +rx_offload_enabled(struct virtio_hw *hw) +{ + return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) || + vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) || + vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6); +} + #define VIRTIO_MBUF_BURST_SZ 64 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc)) uint16_t @@ -647,6 +821,8 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) int error; uint32_t i, nb_enqueued; uint32_t hdr_size; + int offload; + struct virtio_net_hdr *hdr; nb_used = VIRTQUEUE_NUSED(vq); @@ -664,6 +840,7 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) nb_rx = 0; nb_enqueued = 0; hdr_size = hw->vtnet_hdr_size; + offload = rx_offload_enabled(hw); for (i = 0; i < num ; i++) { rxm = rcv_pkts[i]; @@ -688,9 +865,18 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) rxm->pkt_len = (uint32_t)(len[i] - hdr_size); rxm->data_len = (uint16_t)(len[i] - hdr_size); + hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr + + RTE_PKTMBUF_HEADROOM - hdr_size); + if (hw->vlan_strip) rte_vlan_strip(rxm); + if (offload && virtio_rx_offload(rxm, hdr) < 0) { + virtio_discard_rxbuf(vq, rxm); + rxvq->stats.errors++; + continue; + } + VIRTIO_DUMP_PACKET(rxm, rxm->data_len); rx_pkts[nb_rx++] = rxm; @@ -750,6 +936,7 @@ virtio_recv_mergeable_pkts(void *rx_queue, uint16_t extra_idx; uint32_t seg_res; uint32_t hdr_size; + int offload; nb_used = VIRTQUEUE_NUSED(vq); @@ -765,6 +952,7 @@ virtio_recv_mergeable_pkts(void *rx_queue, extra_idx = 0; seg_res = 0; hdr_size = hw->vtnet_hdr_size; + offload = rx_offload_enabled(hw); while (i < nb_used) { struct virtio_net_hdr_mrg_rxbuf *header; @@ -810,6 +998,12 @@ virtio_recv_mergeable_pkts(void *rx_queue, rx_pkts[nb_rx] = rxm; prev = rxm; + if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) { + virtio_discard_rxbuf(vq, rxm); + rxvq->stats.errors++; + continue; + } + seg_res = seg_num - 1; while (seg_res != 0) {