Other kdrv = Y
ARMv8 = Y
Power8 = Y
+x86-32 = Y
x86-64 = Y
Usage doc = Y
Features
--------
-- Multi arch support: x86_64, POWER8, ARMv8.
+- Multi arch support: x86_64, POWER8, ARMv8, i686.
- Multiple TX and RX queues.
- Support for scattered TX and RX frames.
- IPv4, IPv6, TCPv4, TCPv6, UDPv4 and UDPv6 RSS on any number of queues.
- Minimal kernel version : v4.14 or the most recent 4.14-rc (see `Linux installation documentation`_)
- Minimal rdma-core version: v15+ commit 0c5f5765213a ("Merge pull request #227 from yishaih/tm")
(see `RDMA Core installation documentation`_)
+- When building for i686 use:
+
+ - rdma-core version 18.0 or above built with 32bit support.
+ - Kernel version 4.14.41 or above.
.. _`Linux installation documentation`: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/plain/Documentation/admin-guide/README.rst
.. _`RDMA Core installation documentation`: https://raw.githubusercontent.com/linux-rdma/rdma-core/master/README.md
rte_memseg_walk(find_lower_va_bound, &addr);
/* keep distance to hugepages to minimize potential conflicts. */
- addr = RTE_PTR_SUB(addr, MLX5_UAR_OFFSET + MLX5_UAR_SIZE);
+ addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX5_UAR_OFFSET + MLX5_UAR_SIZE));
/* anonymous mmap, no real memory consumption. */
addr = mmap(addr, MLX5_UAR_SIZE,
PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
priv->device_attr = attr;
priv->pd = pd;
priv->mtu = ETHER_MTU;
+#ifndef RTE_ARCH_64
+ /* Initialize UAR access locks for 32bit implementations. */
+ rte_spinlock_init(&priv->uar_lock_cq);
+ for (i = 0; i < MLX5_UAR_PAGE_NUM_MAX; i++)
+ rte_spinlock_init(&priv->uar_lock[i]);
+#endif
/* Some internal functions rely on Netlink sockets, open them now. */
priv->nl_socket_rdma = mlx5_nl_init(0, NETLINK_RDMA);
priv->nl_socket_route = mlx5_nl_init(RTMGRP_LINK, NETLINK_ROUTE);
int nl_socket_rdma; /* Netlink socket (NETLINK_RDMA). */
int nl_socket_route; /* Netlink socket (NETLINK_ROUTE). */
uint32_t nl_sn; /* Netlink message sequence number. */
+#ifndef RTE_ARCH_64
+ rte_spinlock_t uar_lock_cq; /* CQs share a common distinct UAR */
+ rte_spinlock_t uar_lock[MLX5_UAR_PAGE_NUM_MAX];
+ /* UAR same-page access control required in 32bit implementations. */
+#endif
};
#define PORT_ID(priv) ((priv)->dev_data->port_id)
#define MLX5_LINK_STATUS_TIMEOUT 10
/* Reserved address space for UAR mapping. */
-#define MLX5_UAR_SIZE (1ULL << 32)
+#define MLX5_UAR_SIZE (1ULL << (sizeof(uintptr_t) * 4))
/* Offset of reserved UAR address space to hugepage memory. Offset is used here
* to minimize possibility of address next to hugepage being used by other code
* in either primary or secondary process, failing to map TX UAR would make TX
* packets invisible to HW.
*/
-#define MLX5_UAR_OFFSET (1ULL << 32)
+#define MLX5_UAR_OFFSET (1ULL << (sizeof(uintptr_t) * 4))
+
+/* Maximum number of UAR pages used by a port,
+ * These are the size and mask for an array of mutexes used to synchronize
+ * the access to port's UARs on platforms that do not support 64 bit writes.
+ * In such systems it is possible to issue the 64 bits DoorBells through two
+ * consecutive writes, each write 32 bits. The access to a UAR page (which can
+ * be accessible by all threads in the process) must be synchronized
+ * (for example, using a semaphore). Such a synchronization is not required
+ * when ringing DoorBells on different UAR pages.
+ * A port with 512 Tx queues uses 8, 4kBytes, UAR pages which are shared
+ * among the ports.
+ */
+#define MLX5_UAR_PAGE_NUM_MAX 64
+#define MLX5_UAR_PAGE_NUM_MASK ((MLX5_UAR_PAGE_NUM_MAX) - 1)
/* Log 2 of the default number of strides per WQE for Multi-Packet RQ. */
#define MLX5_MPRQ_STRIDE_NUM_N 6U
doorbell = (uint64_t)doorbell_hi << 32;
doorbell |= rxq->cqn;
rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
- rte_write64(rte_cpu_to_be_64(doorbell), cq_db_reg);
+ mlx5_uar_write64(rte_cpu_to_be_64(doorbell),
+ cq_db_reg, rxq->uar_lock_cq);
}
/**
tmpl->rxq.elts_n = log2above(desc);
tmpl->rxq.elts =
(struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
+#ifndef RTE_ARCH_64
+ tmpl->rxq.uar_lock_cq = &priv->uar_lock_cq;
+#endif
tmpl->idx = idx;
rte_atomic32_inc(&tmpl->refcnt);
LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
volatile struct mlx5_wqe_ctrl *last_wqe = NULL;
unsigned int segs_n = 0;
const unsigned int max_inline = txq->max_inline;
+ uint64_t addr_64;
if (unlikely(!pkts_n))
return 0;
ds = 3;
use_dseg:
/* Add the remaining packet as a simple ds. */
- addr = rte_cpu_to_be_64(addr);
+ addr_64 = rte_cpu_to_be_64(addr);
*dseg = (rte_v128u32_t){
rte_cpu_to_be_32(length),
mlx5_tx_mb2mr(txq, buf),
- addr,
- addr >> 32,
+ addr_64,
+ addr_64 >> 32,
};
++ds;
if (!segs_n)
total_length += length;
#endif
/* Store segment information. */
- addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t));
+ addr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t));
*dseg = (rte_v128u32_t){
rte_cpu_to_be_32(length),
mlx5_tx_mb2mr(txq, buf),
- addr,
- addr >> 32,
+ addr_64,
+ addr_64 >> 32,
};
(*txq->elts)[++elts_head & elts_m] = buf;
if (--segs_n)
unsigned int mpw_room = 0;
unsigned int inl_pad = 0;
uint32_t inl_hdr;
+ uint64_t addr_64;
struct mlx5_mpw mpw = {
.state = MLX5_MPW_STATE_CLOSED,
};
((uintptr_t)mpw.data.raw +
inl_pad);
(*txq->elts)[elts_head++ & elts_m] = buf;
- addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
- uintptr_t));
+ addr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
+ uintptr_t));
*dseg = (rte_v128u32_t) {
rte_cpu_to_be_32(length),
mlx5_tx_mb2mr(txq, buf),
- addr,
- addr >> 32,
+ addr_64,
+ addr_64 >> 32,
};
mpw.data.raw = (volatile void *)(dseg + 1);
mpw.total_len += (inl_pad + sizeof(*dseg));
#include <rte_common.h>
#include <rte_hexdump.h>
#include <rte_atomic.h>
+#include <rte_spinlock.h>
+#include <rte_io.h>
#include "mlx5_utils.h"
#include "mlx5.h"
void *cq_uar; /* CQ user access region. */
uint32_t cqn; /* CQ number. */
uint8_t cq_arm_sn; /* CQ arm seq number. */
+#ifndef RTE_ARCH_64
+ rte_spinlock_t *uar_lock_cq;
+ /* CQ (UAR) access lock required for 32bit implementations */
+#endif
uint32_t tunnel; /* Tunnel information. */
} __rte_cache_aligned;
volatile void *bf_reg; /* Blueflame register remapped. */
struct rte_mbuf *(*elts)[]; /* TX elements. */
struct mlx5_txq_stats stats; /* TX queue counters. */
+#ifndef RTE_ARCH_64
+ rte_spinlock_t *uar_lock;
+ /* UAR access lock required for 32bit implementations */
+#endif
} __rte_cache_aligned;
/* Verbs Rx queue elements. */
uint32_t mlx5_rx_addr2mr_bh(struct mlx5_rxq_data *rxq, uintptr_t addr);
uint32_t mlx5_tx_addr2mr_bh(struct mlx5_txq_data *txq, uintptr_t addr);
+/**
+ * Provide safe 64bit store operation to mlx5 UAR region for both 32bit and
+ * 64bit architectures.
+ *
+ * @param val
+ * value to write in CPU endian format.
+ * @param addr
+ * Address to write to.
+ * @param lock
+ * Address of the lock to use for that UAR access.
+ */
+static __rte_always_inline void
+__mlx5_uar_write64_relaxed(uint64_t val, volatile void *addr,
+ rte_spinlock_t *lock __rte_unused)
+{
+#ifdef RTE_ARCH_64
+ rte_write64_relaxed(val, addr);
+#else /* !RTE_ARCH_64 */
+ rte_spinlock_lock(lock);
+ rte_write32_relaxed(val, addr);
+ rte_io_wmb();
+ rte_write32_relaxed(val >> 32,
+ (volatile void *)((volatile char *)addr + 4));
+ rte_spinlock_unlock(lock);
+#endif
+}
+
+/**
+ * Provide safe 64bit store operation to mlx5 UAR region for both 32bit and
+ * 64bit architectures while guaranteeing the order of execution with the
+ * code being executed.
+ *
+ * @param val
+ * value to write in CPU endian format.
+ * @param addr
+ * Address to write to.
+ * @param lock
+ * Address of the lock to use for that UAR access.
+ */
+static __rte_always_inline void
+__mlx5_uar_write64(uint64_t val, volatile void *addr, rte_spinlock_t *lock)
+{
+ rte_io_wmb();
+ __mlx5_uar_write64_relaxed(val, addr, lock);
+}
+
+/* Assist macros, used instead of directly calling the functions they wrap. */
+#ifdef RTE_ARCH_64
+#define mlx5_uar_write64_relaxed(val, dst, lock) \
+ __mlx5_uar_write64_relaxed(val, dst, NULL)
+#define mlx5_uar_write64(val, dst, lock) __mlx5_uar_write64(val, dst, NULL)
+#else
+#define mlx5_uar_write64_relaxed(val, dst, lock) \
+ __mlx5_uar_write64_relaxed(val, dst, lock)
+#define mlx5_uar_write64(val, dst, lock) __mlx5_uar_write64(val, dst, lock)
+#endif
+
#ifndef NDEBUG
/**
* Verify or set magic value in CQE.
*txq->qp_db = rte_cpu_to_be_32(txq->wqe_ci);
/* Ensure ordering between DB record and BF copy. */
rte_wmb();
- *dst = *src;
+ mlx5_uar_write64_relaxed(*src, dst, txq->uar_lock);
if (cond)
rte_wmb();
}
struct mlx5_txq_ctrl *txq_ctrl;
int already_mapped;
size_t page_size = sysconf(_SC_PAGESIZE);
+#ifndef RTE_ARCH_64
+ unsigned int lock_idx;
+#endif
memset(pages, 0, priv->txqs_n * sizeof(uintptr_t));
/*
}
/* new address in reserved UAR address space. */
addr = RTE_PTR_ADD(priv->uar_base,
- uar_va & (MLX5_UAR_SIZE - 1));
+ uar_va & (uintptr_t)(MLX5_UAR_SIZE - 1));
if (!already_mapped) {
pages[pages_n++] = uar_va;
/* fixed mmap to specified address in reserved
else
assert(txq_ctrl->txq.bf_reg ==
RTE_PTR_ADD((void *)addr, off));
+#ifndef RTE_ARCH_64
+ /* Assign a UAR lock according to UAR page number */
+ lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
+ MLX5_UAR_PAGE_NUM_MASK;
+ txq->uar_lock = &priv->uar_lock[lock_idx];
+#endif
}
return 0;
}
rte_atomic32_inc(&txq_ibv->refcnt);
if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
+ DRV_LOG(DEBUG, "port %u: uar_mmap_offset 0x%lx",
+ dev->data->port_id, txq_ctrl->uar_mmap_offset);
} else {
DRV_LOG(ERR,
"port %u failed to retrieve UAR info, invalid"