unsigned int i;
uint16_t (*rx_func)(void *, struct rte_mbuf **, uint16_t) =
mlx5_rx_burst;
+ unsigned int max_frame_len;
+ int rehash;
+ int restart = priv->started;
if (mlx5_is_secondary())
return -E_RTE_SECONDARY;
goto out;
} else
DEBUG("adapter port %u MTU set to %u", priv->port, mtu);
- priv->mtu = mtu;
/* Temporarily replace RX handler with a fake one, assuming it has not
* been copied elsewhere. */
dev->rx_pkt_burst = removed_rx_burst;
* removed_rx_burst() instead. */
rte_wmb();
usleep(1000);
+ /* MTU does not include header and CRC. */
+ max_frame_len = ETHER_HDR_LEN + mtu + ETHER_CRC_LEN;
+ /* Check if at least one queue is going to need a SGE update. */
+ for (i = 0; i != priv->rxqs_n; ++i) {
+ struct rxq *rxq = (*priv->rxqs)[i];
+ unsigned int mb_len;
+ unsigned int size = RTE_PKTMBUF_HEADROOM + max_frame_len;
+ unsigned int sges_n;
+
+ if (rxq == NULL)
+ continue;
+ mb_len = rte_pktmbuf_data_room_size(rxq->mp);
+ assert(mb_len >= RTE_PKTMBUF_HEADROOM);
+ /*
+ * Determine the number of SGEs needed for a full packet
+ * and round it to the next power of two.
+ */
+ sges_n = log2above((size / mb_len) + !!(size % mb_len));
+ if (sges_n != rxq->sges_n)
+ break;
+ }
+ /*
+ * If all queues have the right number of SGEs, a simple rehash
+ * of their buffers is enough, otherwise SGE information can only
+ * be updated in a queue by recreating it. All resources that depend
+ * on queues (flows, indirection tables) must be recreated as well in
+ * that case.
+ */
+ rehash = (i == priv->rxqs_n);
+ if (!rehash) {
+ /* Clean up everything as with mlx5_dev_stop(). */
+ priv_special_flow_disable_all(priv);
+ priv_mac_addrs_disable(priv);
+ priv_destroy_hash_rxqs(priv);
+ priv_fdir_disable(priv);
+ priv_dev_interrupt_handler_uninstall(priv, dev);
+ }
+recover:
/* Reconfigure each RX queue. */
for (i = 0; (i != priv->rxqs_n); ++i) {
struct rxq *rxq = (*priv->rxqs)[i];
- unsigned int mb_len;
- unsigned int max_frame_len;
+ struct rxq_ctrl *rxq_ctrl =
+ container_of(rxq, struct rxq_ctrl, rxq);
int sp;
+ unsigned int mb_len;
+ unsigned int tmp;
if (rxq == NULL)
continue;
- /* Calculate new maximum frame length according to MTU and
- * toggle scattered support (sp) if necessary. */
- max_frame_len = (priv->mtu + ETHER_HDR_LEN +
- (ETHER_MAX_VLAN_FRAME_LEN - ETHER_MAX_LEN));
mb_len = rte_pktmbuf_data_room_size(rxq->mp);
assert(mb_len >= RTE_PKTMBUF_HEADROOM);
+ /* Toggle scattered support (sp) if necessary. */
sp = (max_frame_len > (mb_len - RTE_PKTMBUF_HEADROOM));
- if (sp) {
- ERROR("%p: RX scatter is not supported", (void *)dev);
- ret = ENOTSUP;
- goto out;
+ /* Provide new values to rxq_setup(). */
+ dev->data->dev_conf.rxmode.jumbo_frame = sp;
+ dev->data->dev_conf.rxmode.max_rx_pkt_len = max_frame_len;
+ if (rehash)
+ ret = rxq_rehash(dev, rxq_ctrl);
+ else
+ ret = rxq_ctrl_setup(dev, rxq_ctrl, rxq->elts_n,
+ rxq_ctrl->socket, NULL, rxq->mp);
+ if (!ret)
+ continue;
+ /* Attempt to roll back in case of error. */
+ tmp = (mb_len << rxq->sges_n) - RTE_PKTMBUF_HEADROOM;
+ if (max_frame_len != tmp) {
+ max_frame_len = tmp;
+ goto recover;
}
+ /* Double fault, disable RX. */
+ break;
+ }
+ /*
+ * Use a safe RX burst function in case of error, otherwise mimic
+ * mlx5_dev_start().
+ */
+ if (ret) {
+ ERROR("unable to reconfigure RX queues, RX disabled");
+ rx_func = removed_rx_burst;
+ } else if (restart &&
+ !rehash &&
+ !priv_create_hash_rxqs(priv) &&
+ !priv_rehash_flows(priv)) {
+ if (dev->data->dev_conf.fdir_conf.mode == RTE_FDIR_MODE_NONE)
+ priv_fdir_enable(priv);
+ priv_dev_interrupt_handler_install(priv, dev);
}
+ priv->mtu = mtu;
/* Burst functions can now be called again. */
rte_wmb();
dev->rx_pkt_burst = rx_func;
rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int elts_n,
struct rte_mbuf *(*pool)[])
{
+ const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
unsigned int i;
int ret = 0;
- /* For each WR (packet). */
+ /* Iterate on segments. */
for (i = 0; (i != elts_n); ++i) {
struct rte_mbuf *buf;
volatile struct mlx5_wqe_data_seg *scat =
assert(rte_pktmbuf_data_len(buf) == 0);
assert(rte_pktmbuf_pkt_len(buf) == 0);
assert(!buf->next);
+ /* Only the first segment keeps headroom. */
+ if (i % sges_n)
+ SET_DATA_OFF(buf, 0);
PORT(buf) = rxq_ctrl->rxq.port_id;
DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
PKT_LEN(buf) = DATA_LEN(buf);
};
(*rxq_ctrl->rxq.elts)[i] = buf;
}
- DEBUG("%p: allocated and configured %u single-segment WRs",
- (void *)rxq_ctrl, elts_n);
+ DEBUG("%p: allocated and configured %u segments (max %u packets)",
+ (void *)rxq_ctrl, elts_n, elts_n / (1 << rxq_ctrl->rxq.sges_n));
assert(ret == 0);
return 0;
error:
struct ibv_exp_wq_attr mod;
int err;
- DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq_ctrl);
+ DEBUG("%p: rehashing queue %p with %u SGE(s) per packet",
+ (void *)dev, (void *)rxq_ctrl, 1 << rxq_ctrl->rxq.sges_n);
+ assert(!(elts_n % (1 << rxq_ctrl->rxq.sges_n)));
/* From now on, any failure will render the queue unusable.
* Reinitialize WQ. */
mod = (struct ibv_exp_wq_attr){
goto error;
}
/* Update doorbell counter. */
- rxq_ctrl->rxq.rq_ci = elts_n;
+ rxq_ctrl->rxq.rq_ci = elts_n >> rxq_ctrl->rxq.sges_n;
rte_wmb();
*rxq_ctrl->rxq.rq_db = htonl(rxq_ctrl->rxq.rq_ci);
error:
int ret = 0;
(void)conf; /* Thresholds configuration (ignored). */
- if (desc == 0) {
- ERROR("%p: invalid number of RX descriptors (must be a"
- " multiple of 2)", (void *)dev);
+ /* Enable scattered packets support for this queue if necessary. */
+ assert(mb_len >= RTE_PKTMBUF_HEADROOM);
+ if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
+ (dev->data->dev_conf.rxmode.max_rx_pkt_len >
+ (mb_len - RTE_PKTMBUF_HEADROOM))) {
+ unsigned int size =
+ RTE_PKTMBUF_HEADROOM +
+ dev->data->dev_conf.rxmode.max_rx_pkt_len;
+ unsigned int sges_n;
+
+ /*
+ * Determine the number of SGEs needed for a full packet
+ * and round it to the next power of two.
+ */
+ sges_n = log2above((size / mb_len) + !!(size % mb_len));
+ tmpl.rxq.sges_n = sges_n;
+ /* Make sure rxq.sges_n did not overflow. */
+ size = mb_len * (1 << tmpl.rxq.sges_n);
+ size -= RTE_PKTMBUF_HEADROOM;
+ if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
+ ERROR("%p: too many SGEs (%u) needed to handle"
+ " requested maximum packet size %u",
+ (void *)dev,
+ 1 << sges_n,
+ dev->data->dev_conf.rxmode.max_rx_pkt_len);
+ return EOVERFLOW;
+ }
+ }
+ DEBUG("%p: maximum number of segments per packet: %u",
+ (void *)dev, 1 << tmpl.rxq.sges_n);
+ if (desc % (1 << tmpl.rxq.sges_n)) {
+ ERROR("%p: number of RX queue descriptors (%u) is not a"
+ " multiple of SGEs per packet (%u)",
+ (void *)dev,
+ desc,
+ 1 << tmpl.rxq.sges_n);
return EINVAL;
}
/* Toggle RX checksum offload if hardware supports it. */
if (priv->hw_csum_l2tun)
tmpl.rxq.csum_l2tun =
!!dev->data->dev_conf.rxmode.hw_ip_checksum;
- (void)mb_len; /* I'll be back! */
/* Use the entire RX mempool as the memory region. */
tmpl.mr = mlx5_mp2mr(priv->pd, mp);
if (tmpl.mr == NULL) {
.wq_context = NULL, /* Could be useful in the future. */
.wq_type = IBV_EXP_WQT_RQ,
/* Max number of outstanding WRs. */
- .max_recv_wr = ((priv->device_attr.max_qp_wr < (int)desc) ?
- priv->device_attr.max_qp_wr :
- (int)desc),
+ .max_recv_wr = desc >> tmpl.rxq.sges_n,
/* Max number of scatter/gather elements in a WR. */
- .max_recv_sge = 1,
+ .max_recv_sge = 1 << tmpl.rxq.sges_n,
.pd = priv->pd,
.cq = tmpl.cq,
.comp_mask =
(void *)dev, strerror(ret));
goto error;
}
+ /*
+ * Make sure number of WRs*SGEs match expectations since a queue
+ * cannot allocate more than "desc" buffers.
+ */
+ if (((int)attr.wq.max_recv_wr != (desc >> tmpl.rxq.sges_n)) ||
+ ((int)attr.wq.max_recv_sge != (1 << tmpl.rxq.sges_n))) {
+ ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs",
+ (void *)dev,
+ (desc >> tmpl.rxq.sges_n), (1 << tmpl.rxq.sges_n),
+ attr.wq.max_recv_wr, attr.wq.max_recv_sge);
+ ret = EINVAL;
+ goto error;
+ }
/* Save port ID. */
tmpl.rxq.port_id = dev->data->port_id;
DEBUG("%p: RTE port ID: %u", (void *)rxq_ctrl, tmpl.rxq.port_id);
tmpl.rxq.elts = elts;
*rxq_ctrl = tmpl;
/* Update doorbell counter. */
- rxq_ctrl->rxq.rq_ci = desc;
+ rxq_ctrl->rxq.rq_ci = desc >> rxq_ctrl->rxq.sges_n;
rte_wmb();
*rxq_ctrl->rxq.rq_db = htonl(rxq_ctrl->rxq.rq_ci);
DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
struct rxq *rxq = dpdk_rxq;
- unsigned int pkts_ret = 0;
- unsigned int i;
- unsigned int rq_ci = rxq->rq_ci;
- const unsigned int elts_n = rxq->elts_n;
- const unsigned int wqe_cnt = elts_n - 1;
+ const unsigned int wqe_cnt = rxq->elts_n - 1;
const unsigned int cqe_cnt = rxq->cqe_n - 1;
+ const unsigned int sges_n = rxq->sges_n;
+ struct rte_mbuf *pkt = NULL;
+ struct rte_mbuf *seg = NULL;
+ volatile struct mlx5_cqe64 *cqe =
+ &(*rxq->cqes)[rxq->cq_ci & cqe_cnt].cqe64;
+ unsigned int i = 0;
+ unsigned int rq_ci = rxq->rq_ci << sges_n;
+ int len;
- for (i = 0; (i != pkts_n); ++i) {
+ while (pkts_n) {
unsigned int idx = rq_ci & wqe_cnt;
- int len;
- struct rte_mbuf *rep;
- struct rte_mbuf *pkt;
volatile struct mlx5_wqe_data_seg *wqe = &(*rxq->wqes)[idx];
- volatile struct mlx5_cqe64 *cqe =
- &(*rxq->cqes)[rxq->cq_ci & cqe_cnt].cqe64;
+ struct rte_mbuf *rep = (*rxq->elts)[idx];
- pkt = (*rxq->elts)[idx];
+ if (pkt)
+ NEXT(seg) = rep;
+ seg = rep;
+ rte_prefetch0(seg);
rte_prefetch0(cqe);
+ rte_prefetch0(wqe);
rep = rte_mbuf_raw_alloc(rxq->mp);
if (unlikely(rep == NULL)) {
+ while (pkt) {
+ seg = NEXT(pkt);
+ rte_mbuf_refcnt_set(pkt, 0);
+ __rte_mbuf_raw_free(pkt);
+ pkt = seg;
+ }
++rxq->stats.rx_nombuf;
break;
}
- SET_DATA_OFF(rep, RTE_PKTMBUF_HEADROOM);
- NB_SEGS(rep) = 1;
- PORT(rep) = rxq->port_id;
- NEXT(rep) = NULL;
- len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt);
- if (unlikely(len == 0)) {
- rte_mbuf_refcnt_set(rep, 0);
- __rte_mbuf_raw_free(rep);
- break;
- }
- if (unlikely(len == -1)) {
- /* RX error, packet is likely too large. */
- rte_mbuf_refcnt_set(rep, 0);
- __rte_mbuf_raw_free(rep);
- ++rxq->stats.idropped;
- --i;
- goto skip;
+ if (!pkt) {
+ cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt].cqe64;
+ len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt);
+ if (len == 0) {
+ rte_mbuf_refcnt_set(rep, 0);
+ __rte_mbuf_raw_free(rep);
+ break;
+ }
+ if (unlikely(len == -1)) {
+ /* RX error, packet is likely too large. */
+ rte_mbuf_refcnt_set(rep, 0);
+ __rte_mbuf_raw_free(rep);
+ ++rxq->stats.idropped;
+ goto skip;
+ }
+ pkt = seg;
+ assert(len >= (rxq->crc_present << 2));
+ /* Update packet information. */
+ if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip |
+ rxq->crc_present) {
+ if (rxq->csum) {
+ pkt->packet_type =
+ rxq_cq_to_pkt_type(cqe);
+ pkt->ol_flags =
+ rxq_cq_to_ol_flags(rxq, cqe);
+ }
+ if (cqe->l4_hdr_type_etc &
+ MLX5_CQE_VLAN_STRIPPED) {
+ pkt->ol_flags |= PKT_RX_VLAN_PKT |
+ PKT_RX_VLAN_STRIPPED;
+ pkt->vlan_tci = ntohs(cqe->vlan_info);
+ }
+ if (rxq->crc_present)
+ len -= ETHER_CRC_LEN;
+ }
+ PKT_LEN(pkt) = len;
}
+ DATA_LEN(rep) = DATA_LEN(seg);
+ PKT_LEN(rep) = PKT_LEN(seg);
+ SET_DATA_OFF(rep, DATA_OFF(seg));
+ NB_SEGS(rep) = NB_SEGS(seg);
+ PORT(rep) = PORT(seg);
+ NEXT(rep) = NULL;
+ (*rxq->elts)[idx] = rep;
/*
* Fill NIC descriptor with the new buffer. The lkey and size
* of the buffers are already known, only the buffer address
* changes.
*/
- wqe->addr = htonll((uintptr_t)rep->buf_addr +
- RTE_PKTMBUF_HEADROOM);
- (*rxq->elts)[idx] = rep;
- /* Update pkt information. */
- if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip |
- rxq->crc_present) {
- if (rxq->csum) {
- pkt->packet_type = rxq_cq_to_pkt_type(cqe);
- pkt->ol_flags = rxq_cq_to_ol_flags(rxq, cqe);
- }
- if (cqe->l4_hdr_type_etc & MLX5_CQE_VLAN_STRIPPED) {
- pkt->ol_flags |= PKT_RX_VLAN_PKT |
- PKT_RX_VLAN_STRIPPED;
- pkt->vlan_tci = ntohs(cqe->vlan_info);
- }
- if (rxq->crc_present)
- len -= ETHER_CRC_LEN;
+ wqe->addr = htonll(rte_pktmbuf_mtod(rep, uintptr_t));
+ if (len > DATA_LEN(seg)) {
+ len -= DATA_LEN(seg);
+ ++NB_SEGS(pkt);
+ ++rq_ci;
+ continue;
}
- PKT_LEN(pkt) = len;
- DATA_LEN(pkt) = len;
+ DATA_LEN(seg) = len;
#ifdef MLX5_PMD_SOFT_COUNTERS
/* Increment bytes counter. */
- rxq->stats.ibytes += len;
+ rxq->stats.ibytes += PKT_LEN(pkt);
#endif
/* Return packet. */
*(pkts++) = pkt;
- ++pkts_ret;
+ pkt = NULL;
+ --pkts_n;
+ ++i;
skip:
+ /* Align consumer index to the next stride. */
+ rq_ci >>= sges_n;
++rq_ci;
+ rq_ci <<= sges_n;
}
- if (unlikely((i == 0) && (rq_ci == rxq->rq_ci)))
+ if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci)))
return 0;
- /* Repost WRs. */
-#ifdef DEBUG_RECV
- DEBUG("%p: reposting %u WRs", (void *)rxq, i);
-#endif
/* Update the consumer index. */
- rxq->rq_ci = rq_ci;
+ rxq->rq_ci = rq_ci >> sges_n;
rte_wmb();
*rxq->cq_db = htonl(rxq->cq_ci);
rte_wmb();
*rxq->rq_db = htonl(rxq->rq_ci);
#ifdef MLX5_PMD_SOFT_COUNTERS
/* Increment packets counter. */
- rxq->stats.ipackets += pkts_ret;
+ rxq->stats.ipackets += i;
#endif
- return pkts_ret;
+ return i;
}
/**
unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
unsigned int vlan_strip:1; /* Enable VLAN stripping. */
unsigned int crc_present:1; /* CRC must be subtracted. */
+ unsigned int sges_n:2; /* Log 2 of SGEs (max buffers per packet). */
uint16_t rq_ci;
uint16_t cq_ci;
uint16_t elts_n;