- if (buf == NULL) {
- rte_errno = ENOMEM;
- ERROR("%p: empty mbuf pool", (void *)rxq);
- goto error;
- }
- elt->buf = buf;
- wr->next = &(*elts)[(i + 1)].wr;
- wr->sg_list = sge;
- wr->num_sge = 1;
- /* Headroom is reserved by rte_pktmbuf_alloc(). */
- assert(buf->data_off == RTE_PKTMBUF_HEADROOM);
- /* Buffer is supposed to be empty. */
- assert(rte_pktmbuf_data_len(buf) == 0);
- assert(rte_pktmbuf_pkt_len(buf) == 0);
- /* sge->addr must be able to store a pointer. */
- assert(sizeof(sge->addr) >= sizeof(uintptr_t));
- /* SGE keeps its headroom. */
- sge->addr = (uintptr_t)
- ((uint8_t *)buf->buf_addr + RTE_PKTMBUF_HEADROOM);
- sge->length = (buf->buf_len - RTE_PKTMBUF_HEADROOM);
- sge->lkey = rxq->mr->lkey;
- /* Redundant check for tailroom. */
- assert(sge->length == rte_pktmbuf_tailroom(buf));
- }
- /* The last WR pointer must be NULL. */
- (*elts)[(i - 1)].wr.next = NULL;
- DEBUG("%p: allocated and configured %u single-segment WRs",
- (void *)rxq, elts_n);
- rxq->elts_n = elts_n;
- rxq->elts_head = 0;
- rxq->elts = elts;
- return 0;
-error:
- if (elts != NULL) {
- for (i = 0; (i != RTE_DIM(*elts)); ++i)
- rte_pktmbuf_free_seg((*elts)[i].buf);
- rte_free(elts);
- }
- DEBUG("%p: failed, freed everything", (void *)rxq);
- assert(rte_errno > 0);
- return -rte_errno;
-}
-
-/**
- * Free RX queue elements.
- *
- * @param rxq
- * Pointer to RX queue structure.
- */
-static void
-rxq_free_elts(struct rxq *rxq)
-{
- unsigned int i;
- unsigned int elts_n = rxq->elts_n;
- struct rxq_elt (*elts)[elts_n] = rxq->elts;
-
- DEBUG("%p: freeing WRs", (void *)rxq);
- rxq->elts_n = 0;
- rxq->elts = NULL;
- if (elts == NULL)
- return;
- for (i = 0; (i != RTE_DIM(*elts)); ++i)
- rte_pktmbuf_free_seg((*elts)[i].buf);
- rte_free(elts);
-}
-
-/**
- * Unregister a MAC address.
- *
- * @param priv
- * Pointer to private structure.
- */
-static void
-priv_mac_addr_del(struct priv *priv)
-{
-#ifndef NDEBUG
- uint8_t (*mac)[ETHER_ADDR_LEN] = &priv->mac.addr_bytes;
-#endif
-
- if (!priv->mac_flow)
- return;
- DEBUG("%p: removing MAC address %02x:%02x:%02x:%02x:%02x:%02x",
- (void *)priv,
- (*mac)[0], (*mac)[1], (*mac)[2], (*mac)[3], (*mac)[4], (*mac)[5]);
- claim_zero(ibv_destroy_flow(priv->mac_flow));
- priv->mac_flow = NULL;
-}
-
-/**
- * Register a MAC address.
- *
- * The MAC address is registered in queue 0.
- *
- * @param priv
- * Pointer to private structure.
- *
- * @return
- * 0 on success, negative errno value otherwise and rte_errno is set.
- */
-static int
-priv_mac_addr_add(struct priv *priv)
-{
- uint8_t (*mac)[ETHER_ADDR_LEN] = &priv->mac.addr_bytes;
- struct rxq *rxq;
- struct ibv_flow *flow;
-
- /* If device isn't started, this is all we need to do. */
- if (!priv->started)
- return 0;
- if (priv->isolated)
- return 0;
- if (*priv->rxqs && (*priv->rxqs)[0])
- rxq = (*priv->rxqs)[0];
- else
- return 0;
-
- /* Allocate flow specification on the stack. */
- struct __attribute__((packed)) {
- struct ibv_flow_attr attr;
- struct ibv_flow_spec_eth spec;
- } data;
- struct ibv_flow_attr *attr = &data.attr;
- struct ibv_flow_spec_eth *spec = &data.spec;
-
- if (priv->mac_flow)
- priv_mac_addr_del(priv);
- /*
- * No padding must be inserted by the compiler between attr and spec.
- * This layout is expected by libibverbs.
- */
- assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec);
- *attr = (struct ibv_flow_attr){
- .type = IBV_FLOW_ATTR_NORMAL,
- .priority = 3,
- .num_of_specs = 1,
- .port = priv->port,
- .flags = 0
- };
- *spec = (struct ibv_flow_spec_eth){
- .type = IBV_FLOW_SPEC_ETH,
- .size = sizeof(*spec),
- .val = {
- .dst_mac = {
- (*mac)[0], (*mac)[1], (*mac)[2],
- (*mac)[3], (*mac)[4], (*mac)[5]
- },
- },
- .mask = {
- .dst_mac = "\xff\xff\xff\xff\xff\xff",
- }
- };
- DEBUG("%p: adding MAC address %02x:%02x:%02x:%02x:%02x:%02x",
- (void *)priv,
- (*mac)[0], (*mac)[1], (*mac)[2], (*mac)[3], (*mac)[4], (*mac)[5]);
- /* Create related flow. */
- flow = ibv_create_flow(rxq->qp, attr);
- if (flow == NULL) {
- rte_errno = errno ? errno : EINVAL;
- ERROR("%p: flow configuration failed, errno=%d: %s",
- (void *)rxq, rte_errno, strerror(errno));
- return -rte_errno;
- }
- assert(priv->mac_flow == NULL);
- priv->mac_flow = flow;
- return 0;
-}
-
-/**
- * Clean up a RX queue.
- *
- * Destroy objects, free allocated memory and reset the structure for reuse.
- *
- * @param rxq
- * Pointer to RX queue structure.
- */
-static void
-rxq_cleanup(struct rxq *rxq)
-{
- DEBUG("cleaning up %p", (void *)rxq);
- rxq_free_elts(rxq);
- if (rxq->qp != NULL)
- claim_zero(ibv_destroy_qp(rxq->qp));
- if (rxq->cq != NULL)
- claim_zero(ibv_destroy_cq(rxq->cq));
- if (rxq->channel != NULL)
- claim_zero(ibv_destroy_comp_channel(rxq->channel));
- if (rxq->mr != NULL)
- claim_zero(ibv_dereg_mr(rxq->mr));
- memset(rxq, 0, sizeof(*rxq));
-}
-
-/**
- * Allocate a Queue Pair.
- * Optionally setup inline receive if supported.
- *
- * @param priv
- * Pointer to private structure.
- * @param cq
- * Completion queue to associate with QP.
- * @param desc
- * Number of descriptors in QP (hint only).
- *
- * @return
- * QP pointer or NULL in case of error and rte_errno is set.
- */
-static struct ibv_qp *
-rxq_setup_qp(struct priv *priv, struct ibv_cq *cq, uint16_t desc)
-{
- struct ibv_qp *qp;
- struct ibv_qp_init_attr attr = {
- /* CQ to be associated with the send queue. */
- .send_cq = cq,
- /* CQ to be associated with the receive queue. */
- .recv_cq = cq,
- .cap = {
- /* Max number of outstanding WRs. */
- .max_recv_wr = ((priv->device_attr.max_qp_wr < desc) ?
- priv->device_attr.max_qp_wr :
- desc),
- /* Max number of scatter/gather elements in a WR. */
- .max_recv_sge = 1,
- },
- .qp_type = IBV_QPT_RAW_PACKET,
- };
-
- qp = ibv_create_qp(priv->pd, &attr);
- if (!qp)
- rte_errno = errno ? errno : EINVAL;
- return qp;
-}
-
-/**
- * Configure a RX queue.
- *
- * @param dev
- * Pointer to Ethernet device structure.
- * @param rxq
- * Pointer to RX queue structure.
- * @param desc
- * Number of descriptors to configure in queue.
- * @param socket
- * NUMA socket on which memory must be allocated.
- * @param[in] conf
- * Thresholds parameters.
- * @param mp
- * Memory pool for buffer allocations.
- *
- * @return
- * 0 on success, negative errno value otherwise and rte_errno is set.
- */
-static int
-rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
- unsigned int socket, const struct rte_eth_rxconf *conf,
- struct rte_mempool *mp)
-{
- struct priv *priv = dev->data->dev_private;
- struct rxq tmpl = {
- .priv = priv,
- .mp = mp,
- .socket = socket
- };
- struct ibv_qp_attr mod;
- struct ibv_recv_wr *bad_wr;
- unsigned int mb_len;
- int ret;
-
- (void)conf; /* Thresholds configuration (ignored). */
- mb_len = rte_pktmbuf_data_room_size(mp);
- if (desc == 0) {
- rte_errno = EINVAL;
- ERROR("%p: invalid number of Rx descriptors", (void *)dev);
- goto error;
- }
- /* Enable scattered packets support for this queue if necessary. */
- assert(mb_len >= RTE_PKTMBUF_HEADROOM);
- if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
- (mb_len - RTE_PKTMBUF_HEADROOM)) {
- ;
- } else if (dev->data->dev_conf.rxmode.enable_scatter) {
- WARN("%p: scattered mode has been requested but is"
- " not supported, this may lead to packet loss",
- (void *)dev);
- } else {
- WARN("%p: the requested maximum Rx packet size (%u) is"
- " larger than a single mbuf (%u) and scattered"
- " mode has not been requested",
- (void *)dev,
- dev->data->dev_conf.rxmode.max_rx_pkt_len,
- mb_len - RTE_PKTMBUF_HEADROOM);
- }
- /* Use the entire RX mempool as the memory region. */
- tmpl.mr = mlx4_mp2mr(priv->pd, mp);
- if (tmpl.mr == NULL) {
- rte_errno = EINVAL;
- ERROR("%p: MR creation failure: %s",
- (void *)dev, strerror(rte_errno));
- goto error;
- }
- if (dev->data->dev_conf.intr_conf.rxq) {
- tmpl.channel = ibv_create_comp_channel(priv->ctx);
- if (tmpl.channel == NULL) {
- rte_errno = ENOMEM;
- ERROR("%p: Rx interrupt completion channel creation"
- " failure: %s",
- (void *)dev, strerror(rte_errno));
- goto error;
- }
- if (mlx4_fd_set_non_blocking(tmpl.channel->fd) < 0) {
- ERROR("%p: unable to make Rx interrupt completion"
- " channel non-blocking: %s",
- (void *)dev, strerror(rte_errno));
- goto error;
- }
- }
- tmpl.cq = ibv_create_cq(priv->ctx, desc, NULL, tmpl.channel, 0);
- if (tmpl.cq == NULL) {
- rte_errno = ENOMEM;
- ERROR("%p: CQ creation failure: %s",
- (void *)dev, strerror(rte_errno));
- goto error;
- }
- DEBUG("priv->device_attr.max_qp_wr is %d",
- priv->device_attr.max_qp_wr);
- DEBUG("priv->device_attr.max_sge is %d",
- priv->device_attr.max_sge);
- tmpl.qp = rxq_setup_qp(priv, tmpl.cq, desc);
- if (tmpl.qp == NULL) {
- ERROR("%p: QP creation failure: %s",
- (void *)dev, strerror(rte_errno));
- goto error;
- }
- mod = (struct ibv_qp_attr){
- /* Move the QP to this state. */
- .qp_state = IBV_QPS_INIT,
- /* Primary port number. */
- .port_num = priv->port
- };
- ret = ibv_modify_qp(tmpl.qp, &mod, IBV_QP_STATE | IBV_QP_PORT);
- if (ret) {
- rte_errno = ret;
- ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
- (void *)dev, strerror(rte_errno));
- goto error;
- }
- ret = rxq_alloc_elts(&tmpl, desc);
- if (ret) {
- ERROR("%p: RXQ allocation failed: %s",
- (void *)dev, strerror(rte_errno));
- goto error;
- }
- ret = ibv_post_recv(tmpl.qp, &(*tmpl.elts)[0].wr, &bad_wr);
- if (ret) {
- rte_errno = ret;
- ERROR("%p: ibv_post_recv() failed for WR %p: %s",
- (void *)dev,
- (void *)bad_wr,
- strerror(rte_errno));
- goto error;
- }
- mod = (struct ibv_qp_attr){
- .qp_state = IBV_QPS_RTR
- };
- ret = ibv_modify_qp(tmpl.qp, &mod, IBV_QP_STATE);