X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxtx.c;h=003eefdd40be0f2d482d21e78275cf8150e99e79;hb=be39124e5b708bbf6ffe9c870ea3a6a6156e15b0;hp=aac35b92e14b3a841c819188d2f47da7e5f46e21;hpb=18a1c20044c09c35b975fae3bda2352668557163;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index aac35b92e1..003eefdd40 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -100,7 +100,8 @@ rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res); static __rte_always_inline void -mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx); +mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx, + const unsigned int strd_n); static int mlx5_queue_state_modify(struct rte_eth_dev *dev, @@ -756,7 +757,8 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) scat = &((volatile struct mlx5_wqe_mprq *) rxq->wqes)[i].dseg; - addr = (uintptr_t)mlx5_mprq_buf_addr(buf); + addr = (uintptr_t)mlx5_mprq_buf_addr(buf, + 1 << rxq->strd_num_n); byte_count = (1 << rxq->strd_sz_n) * (1 << rxq->strd_num_n); } else { @@ -788,7 +790,7 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) } /** - * Modify a Verbs queue state. + * Modify a Verbs/DevX queue state. * This must be called from the primary process. * * @param dev @@ -807,15 +809,34 @@ mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, struct mlx5_priv *priv = dev->data->dev_private; if (sm->is_wq) { - struct ibv_wq_attr mod = { - .attr_mask = IBV_WQ_ATTR_STATE, - .wq_state = sm->state, - }; struct mlx5_rxq_data *rxq = (*priv->rxqs)[sm->queue_id]; struct mlx5_rxq_ctrl *rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq); - ret = mlx5_glue->modify_wq(rxq_ctrl->ibv->wq, &mod); + if (rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_IBV) { + struct ibv_wq_attr mod = { + .attr_mask = IBV_WQ_ATTR_STATE, + .wq_state = sm->state, + }; + + ret = mlx5_glue->modify_wq(rxq_ctrl->obj->wq, &mod); + } else { /* rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ. */ + struct mlx5_devx_modify_rq_attr rq_attr; + + memset(&rq_attr, 0, sizeof(rq_attr)); + if (sm->state == IBV_WQS_RESET) { + rq_attr.rq_state = MLX5_RQC_STATE_ERR; + rq_attr.state = MLX5_RQC_STATE_RST; + } else if (sm->state == IBV_WQS_RDY) { + rq_attr.rq_state = MLX5_RQC_STATE_RST; + rq_attr.state = MLX5_RQC_STATE_RDY; + } else if (sm->state == IBV_WQS_ERR) { + rq_attr.rq_state = MLX5_RQC_STATE_RDY; + rq_attr.state = MLX5_RQC_STATE_ERR; + } + ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, + &rq_attr); + } if (ret) { DRV_LOG(ERR, "Cannot change Rx WQ state to %u - %s\n", sm->state, strerror(errno)); @@ -1353,6 +1374,101 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) return i; } +/** + * Update LRO packet TCP header. + * The HW LRO feature doesn't update the TCP header after coalescing the + * TCP segments but supplies information in CQE to fill it by SW. + * + * @param tcp + * Pointer to the TCP header. + * @param cqe + * Pointer to the completion entry.. + * @param phcsum + * The L3 pseudo-header checksum. + */ +static inline void +mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *restrict tcp, + volatile struct mlx5_cqe *restrict cqe, + uint32_t phcsum) +{ + uint8_t l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) & + MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; + /* + * The HW calculates only the TCP payload checksum, need to complete + * the TCP header checksum and the L3 pseudo-header checksum. + */ + uint32_t csum = phcsum + cqe->csum; + + if (l4_type == MLX5_L4_HDR_TYPE_TCP_EMPTY_ACK || + l4_type == MLX5_L4_HDR_TYPE_TCP_WITH_ACL) { + tcp->tcp_flags |= RTE_TCP_ACK_FLAG; + tcp->recv_ack = cqe->lro_ack_seq_num; + tcp->rx_win = cqe->lro_tcp_win; + } + if (cqe->lro_tcppsh_abort_dupack & MLX5_CQE_LRO_PUSH_MASK) + tcp->tcp_flags |= RTE_TCP_PSH_FLAG; + tcp->cksum = 0; + csum += rte_raw_cksum(tcp, (tcp->data_off & 0xF) * 4); + csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff); + csum = (~csum) & 0xffff; + if (csum == 0) + csum = 0xffff; + tcp->cksum = csum; +} + +/** + * Update LRO packet headers. + * The HW LRO feature doesn't update the L3/TCP headers after coalescing the + * TCP segments but supply information in CQE to fill it by SW. + * + * @param padd + * The packet address. + * @param cqe + * Pointer to the completion entry.. + * @param len + * The packet length. + */ +static inline void +mlx5_lro_update_hdr(uint8_t *restrict padd, + volatile struct mlx5_cqe *restrict cqe, + uint32_t len) +{ + union { + struct rte_ether_hdr *eth; + struct rte_vlan_hdr *vlan; + struct rte_ipv4_hdr *ipv4; + struct rte_ipv6_hdr *ipv6; + struct rte_tcp_hdr *tcp; + uint8_t *hdr; + } h = { + .hdr = padd, + }; + uint16_t proto = h.eth->ether_type; + uint32_t phcsum; + + h.eth++; + while (proto == RTE_BE16(RTE_ETHER_TYPE_VLAN) || + proto == RTE_BE16(RTE_ETHER_TYPE_QINQ)) { + proto = h.vlan->eth_proto; + h.vlan++; + } + if (proto == RTE_BE16(RTE_ETHER_TYPE_IPV4)) { + h.ipv4->time_to_live = cqe->lro_min_ttl; + h.ipv4->total_length = rte_cpu_to_be_16(len - (h.hdr - padd)); + h.ipv4->hdr_checksum = 0; + h.ipv4->hdr_checksum = rte_ipv4_cksum(h.ipv4); + phcsum = rte_ipv4_phdr_cksum(h.ipv4, 0); + h.ipv4++; + } else { + h.ipv6->hop_limits = cqe->lro_min_ttl; + h.ipv6->payload_len = rte_cpu_to_be_16(len - (h.hdr - padd) - + sizeof(*h.ipv6)); + phcsum = rte_ipv6_phdr_cksum(h.ipv6, 0); + h.ipv6++; + } + mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum); +} + void mlx5_mprq_buf_free_cb(void *addr __rte_unused, void *opaque) { @@ -1373,7 +1489,8 @@ mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf) } static inline void -mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx) +mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx, + const unsigned int strd_n) { struct mlx5_mprq_buf *rep = rxq->mprq_repl; volatile struct mlx5_wqe_data_seg *wqe = @@ -1384,7 +1501,7 @@ mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx) /* Replace MPRQ buf. */ (*rxq->mprq_bufs)[rq_idx] = rep; /* Replace WQE. */ - addr = mlx5_mprq_buf_addr(rep); + addr = mlx5_mprq_buf_addr(rep, strd_n); wqe->addr = rte_cpu_to_be_64((uintptr_t)addr); /* If there's only one MR, no need to replace LKey in WQE. */ if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) @@ -1423,6 +1540,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) unsigned int i = 0; uint32_t rq_ci = rxq->rq_ci; uint16_t consumed_strd = rxq->consumed_strd; + uint16_t headroom_sz = rxq->strd_headroom_en * RTE_PKTMBUF_HEADROOM; struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; while (i < pkts_n) { @@ -1436,11 +1554,12 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) uint32_t byte_cnt; volatile struct mlx5_mini_cqe8 *mcqe = NULL; uint32_t rss_hash_res = 0; + uint8_t lro_num_seg; if (consumed_strd == strd_n) { /* Replace WQE only if the buffer is still in use. */ if (rte_atomic16_read(&buf->refcnt) > 1) { - mprq_buf_replace(rxq, rq_ci & wq_mask); + mprq_buf_replace(rxq, rq_ci & wq_mask, strd_n); /* Release the old buffer. */ mlx5_mprq_buf_free(buf); } else if (unlikely(rxq->mprq_repl == NULL)) { @@ -1481,6 +1600,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) } assert(strd_idx < strd_n); assert(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & wq_mask)); + lro_num_seg = cqe->lro_num_seg; /* * Currently configured to receive a packet per a stride. But if * MTU is adjusted through kernel interface, device could @@ -1488,7 +1608,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) * case, the packet should be dropped because it is bigger than * the max_rx_pkt_len. */ - if (unlikely(strd_cnt > 1)) { + if (unlikely(!lro_num_seg && strd_cnt > 1)) { ++rxq->stats.idropped; continue; } @@ -1502,9 +1622,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) if (rxq->crc_present) len -= RTE_ETHER_CRC_LEN; offset = strd_idx * strd_sz + strd_shift; - addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf), offset); - /* Initialize the offload flag. */ - pkt->ol_flags = 0; + addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n), offset); /* * Memcpy packets to the target mbuf if: * - The size of packet is smaller than mprq_max_memcpy_len. @@ -1521,34 +1639,37 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) continue; } rte_memcpy(rte_pktmbuf_mtod(pkt, void *), addr, len); + DATA_LEN(pkt) = len; } else { rte_iova_t buf_iova; struct rte_mbuf_ext_shared_info *shinfo; uint16_t buf_len = strd_cnt * strd_sz; + void *buf_addr; /* Increment the refcnt of the whole chunk. */ rte_atomic16_add_return(&buf->refcnt, 1); assert((uint16_t)rte_atomic16_read(&buf->refcnt) <= strd_n + 1); - addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM); + buf_addr = RTE_PTR_SUB(addr, headroom_sz); /* * MLX5 device doesn't use iova but it is necessary in a * case where the Rx packet is transmitted via a * different PMD. */ buf_iova = rte_mempool_virt2iova(buf) + - RTE_PTR_DIFF(addr, buf); - shinfo = rte_pktmbuf_ext_shinfo_init_helper(addr, - &buf_len, mlx5_mprq_buf_free_cb, buf); + RTE_PTR_DIFF(buf_addr, buf); + shinfo = &buf->shinfos[strd_idx]; + rte_mbuf_ext_refcnt_set(shinfo, 1); /* * EXT_ATTACHED_MBUF will be set to pkt->ol_flags when * attaching the stride to mbuf and more offload flags * will be added below by calling rxq_cq_to_mbuf(). * Other fields will be overwritten. */ - rte_pktmbuf_attach_extbuf(pkt, addr, buf_iova, buf_len, - shinfo); - rte_pktmbuf_reset_headroom(pkt); + rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova, + buf_len, shinfo); + /* Set mbuf head-room. */ + pkt->data_off = headroom_sz; assert(pkt->ol_flags == EXT_ATTACHED_MBUF); /* * Prevent potential overflow due to MTU change through @@ -1559,10 +1680,34 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) ++rxq->stats.idropped; continue; } + DATA_LEN(pkt) = len; + /* + * LRO packet may consume all the stride memory, in this + * case packet head-room space is not guaranteed so must + * to add an empty mbuf for the head-room. + */ + if (!rxq->strd_headroom_en) { + struct rte_mbuf *headroom_mbuf = + rte_pktmbuf_alloc(rxq->mp); + + if (unlikely(headroom_mbuf == NULL)) { + rte_pktmbuf_free_seg(pkt); + ++rxq->stats.rx_nombuf; + break; + } + PORT(pkt) = rxq->port_id; + NEXT(headroom_mbuf) = pkt; + pkt = headroom_mbuf; + NB_SEGS(pkt) = 2; + } } rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res); + if (lro_num_seg > 1) { + mlx5_lro_update_hdr(addr, cqe, len); + pkt->ol_flags |= PKT_RX_LRO; + pkt->tso_segsz = strd_sz; + } PKT_LEN(pkt) = len; - DATA_LEN(pkt) = len; PORT(pkt) = rxq->port_id; #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment bytes counter. */