va2pa depends on the physical address and virtual address offset of
current mbuf. It may get the wrong physical address of next mbuf which
allocated in another hugepage segment.
In rte_mempool_populate_default(), trying to allocate whole block of
contiguous memory could be failed. Then, it would reserve memory in
several memzones that have different physical address and virtual address
offsets. The rte_mempool_populate_default() is used by
rte_pktmbuf_pool_create().
Fixes:
8451269e6d7b ("kni: remove continuous memory restriction")
Cc: stable@dpdk.org
Signed-off-by: Yangchao Zhou <zhouyates@gmail.com>
Acked-by: Ferruh Yigit <ferruh.yigit@intel.com>
-----------------
On the DPDK RX side, the mbuf is allocated by the PMD in the RX thread context.
-----------------
On the DPDK RX side, the mbuf is allocated by the PMD in the RX thread context.
-This thread will enqueue the mbuf in the rx_q FIFO.
+This thread will enqueue the mbuf in the rx_q FIFO,
+and the next pointers in mbuf-chain will convert to physical address.
The KNI thread will poll all KNI active devices for the rx_q.
If an mbuf is dequeued, it will be converted to a sk_buff and sent to the net stack via netif_rx().
The KNI thread will poll all KNI active devices for the rx_q.
If an mbuf is dequeued, it will be converted to a sk_buff and sent to the net stack via netif_rx().
-The dequeued mbuf must be freed, so the same pointer is sent back in the free_q FIFO.
+The dequeued mbuf must be freed, so the same pointer is sent back in the free_q FIFO,
+and next pointers must convert back to virtual address if exists before put in the free_q FIFO.
The RX thread, in the same main loop, polls this FIFO and frees the mbuf after dequeuing it.
The RX thread, in the same main loop, polls this FIFO and frees the mbuf after dequeuing it.
+The address conversion of the next pointer is to prevent the chained mbuf
+in different hugepage segments from causing kernel crash.
Use Case: Egress
----------------
Use Case: Egress
----------------
return phys_to_virt(m->buf_physaddr + m->data_off);
}
return phys_to_virt(m->buf_physaddr + m->data_off);
}
-/* virtual address to physical address */
-static void *
-va2pa(void *va, struct rte_kni_mbuf *m)
-{
- void *pa;
-
- pa = (void *)((unsigned long)va -
- ((unsigned long)m->buf_addr -
- (unsigned long)m->buf_physaddr));
- return pa;
-}
-
/*
* It can be called to process the request.
*/
/*
* It can be called to process the request.
*/
struct rte_kni_fifo *src_pa, struct rte_kni_fifo *dst_va)
{
uint32_t ret, i, num_dst, num_rx;
struct rte_kni_fifo *src_pa, struct rte_kni_fifo *dst_va)
{
uint32_t ret, i, num_dst, num_rx;
+ struct rte_kni_mbuf *kva, *prev_kva;
+ int nb_segs;
+ int kva_nb_segs;
+
do {
num_dst = kni_fifo_free_count(dst_va);
if (num_dst == 0)
do {
num_dst = kni_fifo_free_count(dst_va);
if (num_dst == 0)
for (i = 0; i < num_rx; i++) {
kva = pa2kva(kni->pa[i]);
kni->va[i] = pa2va(kni->pa[i], kva);
for (i = 0; i < num_rx; i++) {
kva = pa2kva(kni->pa[i]);
kni->va[i] = pa2va(kni->pa[i], kva);
+
+ kva_nb_segs = kva->nb_segs;
+ for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) {
+ if (!kva->next)
+ break;
+
+ prev_kva = kva;
+ kva = pa2kva(kva->next);
+ /* Convert physical address to virtual address */
+ prev_kva->next = pa2va(prev_kva->next, kva);
+ }
}
ret = kni_fifo_put(dst_va, kni->va, num_rx);
}
ret = kni_fifo_put(dst_va, kni->va, num_rx);
uint32_t ret;
uint32_t len;
uint32_t i, num_rx, num_fq;
uint32_t ret;
uint32_t len;
uint32_t i, num_rx, num_fq;
- struct rte_kni_mbuf *kva;
+ struct rte_kni_mbuf *kva, *prev_kva;
void *data_kva;
struct sk_buff *skb;
struct net_device *dev = kni->net_dev;
void *data_kva;
struct sk_buff *skb;
struct net_device *dev = kni->net_dev;
- kva = pa2kva(va2pa(kva->next, kva));
+ prev_kva = kva;
+ kva = pa2kva(kva->next);
data_kva = kva2data_kva(kva);
data_kva = kva2data_kva(kva);
+ /* Convert physical address to virtual address */
+ prev_kva->next = pa2va(prev_kva->next, kva);
uint32_t ret;
uint32_t len;
uint32_t i, num, num_rq, num_tq, num_aq, num_fq;
uint32_t ret;
uint32_t len;
uint32_t i, num, num_rq, num_tq, num_aq, num_fq;
- struct rte_kni_mbuf *kva;
+ struct rte_kni_mbuf *kva, *next_kva;
void *data_kva;
struct rte_kni_mbuf *alloc_kva;
void *alloc_data_kva;
void *data_kva;
struct rte_kni_mbuf *alloc_kva;
void *alloc_data_kva;
data_kva = kva2data_kva(kva);
kni->va[i] = pa2va(kni->pa[i], kva);
data_kva = kva2data_kva(kva);
kni->va[i] = pa2va(kni->pa[i], kva);
+ while (kva->next) {
+ next_kva = pa2kva(kva->next);
+ /* Convert physical address to virtual address */
+ kva->next = pa2va(kva->next, next_kva);
+ kva = next_kva;
+ }
+
alloc_kva = pa2kva(kni->alloc_pa[i]);
alloc_data_kva = kva2data_kva(alloc_kva);
kni->alloc_va[i] = pa2va(kni->alloc_pa[i], alloc_kva);
alloc_kva = pa2kva(kni->alloc_pa[i]);
alloc_data_kva = kva2data_kva(alloc_kva);
kni->alloc_va[i] = pa2va(kni->alloc_pa[i], alloc_kva);
uint32_t ret;
uint32_t len;
uint32_t i, num_rq, num_fq, num;
uint32_t ret;
uint32_t len;
uint32_t i, num_rq, num_fq, num;
- struct rte_kni_mbuf *kva;
+ struct rte_kni_mbuf *kva, *prev_kva;
void *data_kva;
struct sk_buff *skb;
struct net_device *dev = kni->net_dev;
void *data_kva;
struct sk_buff *skb;
struct net_device *dev = kni->net_dev;
- kva = pa2kva(va2pa(kva->next, kva));
+ prev_kva = kva;
+ kva = pa2kva(kva->next);
data_kva = kva2data_kva(kva);
data_kva = kva2data_kva(kva);
+ /* Convert physical address to virtual address */
+ prev_kva->next = pa2va(prev_kva->next, kva);
/* fields on second cache line */
char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
void *pool;
/* fields on second cache line */
char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
void *pool;
+ void *next; /**< Physical address of next mbuf in kernel. */
(unsigned long)m->buf_iova));
}
(unsigned long)m->buf_iova));
}
+static void *
+va2pa_all(struct rte_mbuf *mbuf)
+{
+ void *phy_mbuf = va2pa(mbuf);
+ struct rte_mbuf *next = mbuf->next;
+ while (next) {
+ mbuf->next = va2pa(next);
+ mbuf = next;
+ next = mbuf->next;
+ }
+ return phy_mbuf;
+}
+
static void
obj_free(struct rte_mempool *mp __rte_unused, void *opaque, void *obj,
unsigned obj_idx __rte_unused)
static void
obj_free(struct rte_mempool *mp __rte_unused, void *opaque, void *obj,
unsigned obj_idx __rte_unused)
unsigned
rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned int num)
{
unsigned
rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned int num)
{
+ num = RTE_MIN(kni_fifo_free_count(kni->rx_q), num);
void *phy_mbufs[num];
unsigned int ret;
unsigned int i;
for (i = 0; i < num; i++)
void *phy_mbufs[num];
unsigned int ret;
unsigned int i;
for (i = 0; i < num; i++)
- phy_mbufs[i] = va2pa(mbufs[i]);
+ phy_mbufs[i] = va2pa_all(mbufs[i]);
ret = kni_fifo_put(kni->rx_q, phy_mbufs, num);
ret = kni_fifo_put(kni->rx_q, phy_mbufs, num);
unsigned fifo_read = __KNI_LOAD_ACQUIRE(&fifo->read);
return (fifo->len + fifo_write - fifo_read) & (fifo->len - 1);
}
unsigned fifo_read = __KNI_LOAD_ACQUIRE(&fifo->read);
return (fifo->len + fifo_write - fifo_read) & (fifo->len - 1);
}
+
+/**
+ * Get the num of available elements in the fifo
+ */
+static inline uint32_t
+kni_fifo_free_count(struct rte_kni_fifo *fifo)
+{
+ uint32_t fifo_write = __KNI_LOAD_ACQUIRE(&fifo->write);
+ uint32_t fifo_read = __KNI_LOAD_ACQUIRE(&fifo->read);
+ return (fifo_read - fifo_write - 1) & (fifo->len - 1);
+}