mempool: fix mempool virt populate with small chunks
[dpdk.git] / drivers / net / dpaa / dpaa_rxtx.c
index 0413932..5dba1db 100644 (file)
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  *
  *   Copyright 2016 Freescale Semiconductor, Inc. All rights reserved.
- *   Copyright 2017 NXP
+ *   Copyright 2017,2019 NXP
  *
  */
 
 #include <rte_eal.h>
 #include <rte_alarm.h>
 #include <rte_ether.h>
-#include <rte_ethdev.h>
+#include <rte_ethdev_driver.h>
 #include <rte_malloc.h>
 #include <rte_ring.h>
 #include <rte_ip.h>
 #include <rte_tcp.h>
 #include <rte_udp.h>
 #include <rte_net.h>
+#include <rte_eventdev.h>
 
 #include "dpaa_ethdev.h"
 #include "dpaa_rxtx.h"
 #include <rte_dpaa_bus.h>
 #include <dpaa_mempool.h>
 
+#include <qman.h>
 #include <fsl_usd.h>
 #include <fsl_qman.h>
 #include <fsl_bman.h>
-#include <of.h>
+#include <dpaa_of.h>
 #include <netcfg.h>
 
 #define DPAA_MBUF_TO_CONTIG_FD(_mbuf, _fd, _bpid) \
@@ -57,7 +59,7 @@
        } while (0)
 
 #if (defined RTE_LIBRTE_DPAA_DEBUG_DRIVER)
-void dpaa_display_frame(const struct qm_fd *fd)
+static void dpaa_display_frame(const struct qm_fd *fd)
 {
        int ii;
        char *ptr;
@@ -88,11 +90,10 @@ static inline void dpaa_slow_parsing(struct rte_mbuf *m __rte_unused,
        /*TBD:XXX: to be implemented*/
 }
 
-static inline void dpaa_eth_packet_info(struct rte_mbuf *m,
-                                       uint64_t fd_virt_addr)
+static inline void dpaa_eth_packet_info(struct rte_mbuf *m, void *fd_virt_addr)
 {
        struct annotations_t *annot = GET_ANNOTATIONS(fd_virt_addr);
-       uint64_t prs = *((uint64_t *)(&annot->parse)) & DPAA_PARSE_MASK;
+       uint64_t prs = *((uintptr_t *)(&annot->parse)) & DPAA_PARSE_MASK;
 
        DPAA_DP_LOG(DEBUG, " Parsing mbuf: %p with annotations: %p", m, annot);
 
@@ -197,44 +198,45 @@ static inline void dpaa_eth_packet_info(struct rte_mbuf *m,
 
 static inline void dpaa_checksum(struct rte_mbuf *mbuf)
 {
-       struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
+       struct rte_ether_hdr *eth_hdr =
+               rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *);
        char *l3_hdr = (char *)eth_hdr + mbuf->l2_len;
-       struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)l3_hdr;
-       struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)l3_hdr;
+       struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)l3_hdr;
+       struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)l3_hdr;
 
        DPAA_DP_LOG(DEBUG, "Calculating checksum for mbuf: %p", mbuf);
 
        if (((mbuf->packet_type & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV4) ||
            ((mbuf->packet_type & RTE_PTYPE_L3_MASK) ==
            RTE_PTYPE_L3_IPV4_EXT)) {
-               ipv4_hdr = (struct ipv4_hdr *)l3_hdr;
+               ipv4_hdr = (struct rte_ipv4_hdr *)l3_hdr;
                ipv4_hdr->hdr_checksum = 0;
                ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
        } else if (((mbuf->packet_type & RTE_PTYPE_L3_MASK) ==
                   RTE_PTYPE_L3_IPV6) ||
                   ((mbuf->packet_type & RTE_PTYPE_L3_MASK) ==
                   RTE_PTYPE_L3_IPV6_EXT))
-               ipv6_hdr = (struct ipv6_hdr *)l3_hdr;
+               ipv6_hdr = (struct rte_ipv6_hdr *)l3_hdr;
 
        if ((mbuf->packet_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP) {
-               struct tcp_hdr *tcp_hdr = (struct tcp_hdr *)(l3_hdr +
+               struct rte_tcp_hdr *tcp_hdr = (struct rte_tcp_hdr *)(l3_hdr +
                                          mbuf->l3_len);
                tcp_hdr->cksum = 0;
-               if (eth_hdr->ether_type == htons(ETHER_TYPE_IPv4))
+               if (eth_hdr->ether_type == htons(RTE_ETHER_TYPE_IPV4))
                        tcp_hdr->cksum = rte_ipv4_udptcp_cksum(ipv4_hdr,
                                                               tcp_hdr);
-               else /* assume ethertype == ETHER_TYPE_IPv6 */
+               else /* assume ethertype == RTE_ETHER_TYPE_IPV6 */
                        tcp_hdr->cksum = rte_ipv6_udptcp_cksum(ipv6_hdr,
                                                               tcp_hdr);
        } else if ((mbuf->packet_type & RTE_PTYPE_L4_MASK) ==
                   RTE_PTYPE_L4_UDP) {
-               struct udp_hdr *udp_hdr = (struct udp_hdr *)(l3_hdr +
+               struct rte_udp_hdr *udp_hdr = (struct rte_udp_hdr *)(l3_hdr +
                                                             mbuf->l3_len);
                udp_hdr->dgram_cksum = 0;
-               if (eth_hdr->ether_type == htons(ETHER_TYPE_IPv4))
+               if (eth_hdr->ether_type == htons(RTE_ETHER_TYPE_IPV4))
                        udp_hdr->dgram_cksum = rte_ipv4_udptcp_cksum(ipv4_hdr,
                                                                     udp_hdr);
-               else /* assume ethertype == ETHER_TYPE_IPv6 */
+               else /* assume ethertype == RTE_ETHER_TYPE_IPV6 */
                        udp_hdr->dgram_cksum = rte_ipv6_udptcp_cksum(ipv6_hdr,
                                                                     udp_hdr);
        }
@@ -305,9 +307,7 @@ dpaa_eth_sg_to_mbuf(const struct qm_fd *fd, uint32_t ifid)
        int i = 0;
        uint8_t fd_offset = fd->offset;
 
-       DPAA_DP_LOG(DEBUG, "Received an SG frame");
-
-       vaddr = rte_dpaa_mem_ptov(qm_fd_addr(fd));
+       vaddr = DPAA_MEMPOOL_PTOV(bp_info, qm_fd_addr(fd));
        if (!vaddr) {
                DPAA_PMD_ERR("unable to convert physical address");
                return NULL;
@@ -316,7 +316,7 @@ dpaa_eth_sg_to_mbuf(const struct qm_fd *fd, uint32_t ifid)
        sg_temp = &sgt[i++];
        hw_sg_to_cpu(sg_temp);
        temp = (struct rte_mbuf *)((char *)vaddr - bp_info->meta_data_size);
-       sg_vaddr = rte_dpaa_mem_ptov(qm_sg_entry_get64(sg_temp));
+       sg_vaddr = DPAA_MEMPOOL_PTOV(bp_info, qm_sg_entry_get64(sg_temp));
 
        first_seg = (struct rte_mbuf *)((char *)sg_vaddr -
                                                bp_info->meta_data_size);
@@ -332,7 +332,8 @@ dpaa_eth_sg_to_mbuf(const struct qm_fd *fd, uint32_t ifid)
        while (i < DPAA_SGT_MAX_ENTRIES) {
                sg_temp = &sgt[i++];
                hw_sg_to_cpu(sg_temp);
-               sg_vaddr = rte_dpaa_mem_ptov(qm_sg_entry_get64(sg_temp));
+               sg_vaddr = DPAA_MEMPOOL_PTOV(bp_info,
+                                            qm_sg_entry_get64(sg_temp));
                cur_seg = (struct rte_mbuf *)((char *)sg_vaddr -
                                                      bp_info->meta_data_size);
                cur_seg->data_off = sg_temp->offset;
@@ -347,8 +348,10 @@ dpaa_eth_sg_to_mbuf(const struct qm_fd *fd, uint32_t ifid)
                }
                prev_seg = cur_seg;
        }
+       DPAA_DP_LOG(DEBUG, "Received an SG frame len =%d, num_sg =%d",
+                       first_seg->pkt_len, first_seg->nb_segs);
 
-       dpaa_eth_packet_info(first_seg, (uint64_t)vaddr);
+       dpaa_eth_packet_info(first_seg, vaddr);
        rte_pktmbuf_free_seg(temp);
 
        return first_seg;
@@ -359,26 +362,27 @@ dpaa_eth_fd_to_mbuf(const struct qm_fd *fd, uint32_t ifid)
 {
        struct rte_mbuf *mbuf;
        struct dpaa_bp_info *bp_info = DPAA_BPID_TO_POOL_INFO(fd->bpid);
-       void *ptr = rte_dpaa_mem_ptov(qm_fd_addr(fd));
+       void *ptr;
        uint8_t format =
                (fd->opaque & DPAA_FD_FORMAT_MASK) >> DPAA_FD_FORMAT_SHIFT;
        uint16_t offset;
        uint32_t length;
 
-       DPAA_DP_LOG(DEBUG, " FD--->MBUF");
-
        if (unlikely(format == qm_fd_sg))
                return dpaa_eth_sg_to_mbuf(fd, ifid);
 
-       rte_prefetch0((void *)((uint8_t *)ptr + DEFAULT_RX_ICEOF));
-
        offset = (fd->opaque & DPAA_FD_OFFSET_MASK) >> DPAA_FD_OFFSET_SHIFT;
        length = fd->opaque & DPAA_FD_LENGTH_MASK;
 
+       DPAA_DP_LOG(DEBUG, " FD--->MBUF off %d len = %d", offset, length);
+
        /* Ignoring case when format != qm_fd_contig */
        dpaa_display_frame(fd);
+       ptr = DPAA_MEMPOOL_PTOV(bp_info, qm_fd_addr(fd));
 
        mbuf = (struct rte_mbuf *)((char *)ptr - bp_info->meta_data_size);
+       /* Prefetch the Parse results and packet data to L1 */
+       rte_prefetch0((void *)((uint8_t *)ptr + DEFAULT_RX_ICEOF));
 
        mbuf->data_off = offset;
        mbuf->data_len = length;
@@ -389,22 +393,121 @@ dpaa_eth_fd_to_mbuf(const struct qm_fd *fd, uint32_t ifid)
        mbuf->ol_flags = 0;
        mbuf->next = NULL;
        rte_mbuf_refcnt_set(mbuf, 1);
-       dpaa_eth_packet_info(mbuf, (uint64_t)mbuf->buf_addr);
+       dpaa_eth_packet_info(mbuf, mbuf->buf_addr);
 
        return mbuf;
 }
 
-enum qman_cb_dqrr_result dpaa_rx_cb(void *event __always_unused,
-                                   struct qman_portal *qm __always_unused,
-                                   struct qman_fq *fq,
-                                   const struct qm_dqrr_entry *dqrr,
-                                   void **bufs)
+/* Specific for LS1043 */
+void
+dpaa_rx_cb_no_prefetch(struct qman_fq **fq, struct qm_dqrr_entry **dqrr,
+          void **bufs, int num_bufs)
 {
-       const struct qm_fd *fd = &dqrr->fd;
+       struct rte_mbuf *mbuf;
+       struct dpaa_bp_info *bp_info;
+       const struct qm_fd *fd;
+       void *ptr;
+       struct dpaa_if *dpaa_intf;
+       uint16_t offset, i;
+       uint32_t length;
+       uint8_t format;
 
-       *bufs = dpaa_eth_fd_to_mbuf(fd,
-                       ((struct dpaa_if *)fq->dpaa_intf)->ifid);
-       return qman_cb_dqrr_consume;
+       bp_info = DPAA_BPID_TO_POOL_INFO(dqrr[0]->fd.bpid);
+       ptr = rte_dpaa_mem_ptov(qm_fd_addr(&dqrr[0]->fd));
+       rte_prefetch0((void *)((uint8_t *)ptr + DEFAULT_RX_ICEOF));
+       bufs[0] = (struct rte_mbuf *)((char *)ptr - bp_info->meta_data_size);
+
+       for (i = 0; i < num_bufs; i++) {
+               if (i < num_bufs - 1) {
+                       bp_info = DPAA_BPID_TO_POOL_INFO(dqrr[i + 1]->fd.bpid);
+                       ptr = rte_dpaa_mem_ptov(qm_fd_addr(&dqrr[i + 1]->fd));
+                       rte_prefetch0((void *)((uint8_t *)ptr +
+                                       DEFAULT_RX_ICEOF));
+                       bufs[i + 1] = (struct rte_mbuf *)((char *)ptr -
+                                       bp_info->meta_data_size);
+               }
+
+               fd = &dqrr[i]->fd;
+               dpaa_intf = fq[0]->dpaa_intf;
+
+               format = (fd->opaque & DPAA_FD_FORMAT_MASK) >>
+                               DPAA_FD_FORMAT_SHIFT;
+               if (unlikely(format == qm_fd_sg)) {
+                       bufs[i] = dpaa_eth_sg_to_mbuf(fd, dpaa_intf->ifid);
+                       continue;
+               }
+
+               offset = (fd->opaque & DPAA_FD_OFFSET_MASK) >>
+                               DPAA_FD_OFFSET_SHIFT;
+               length = fd->opaque & DPAA_FD_LENGTH_MASK;
+
+               mbuf = bufs[i];
+               mbuf->data_off = offset;
+               mbuf->data_len = length;
+               mbuf->pkt_len = length;
+               mbuf->port = dpaa_intf->ifid;
+
+               mbuf->nb_segs = 1;
+               mbuf->ol_flags = 0;
+               mbuf->next = NULL;
+               rte_mbuf_refcnt_set(mbuf, 1);
+               dpaa_eth_packet_info(mbuf, mbuf->buf_addr);
+       }
+}
+
+void
+dpaa_rx_cb(struct qman_fq **fq, struct qm_dqrr_entry **dqrr,
+          void **bufs, int num_bufs)
+{
+       struct rte_mbuf *mbuf;
+       const struct qm_fd *fd;
+       struct dpaa_if *dpaa_intf;
+       uint16_t offset, i;
+       uint32_t length;
+       uint8_t format;
+
+       for (i = 0; i < num_bufs; i++) {
+               fd = &dqrr[i]->fd;
+               dpaa_intf = fq[0]->dpaa_intf;
+
+               format = (fd->opaque & DPAA_FD_FORMAT_MASK) >>
+                               DPAA_FD_FORMAT_SHIFT;
+               if (unlikely(format == qm_fd_sg)) {
+                       bufs[i] = dpaa_eth_sg_to_mbuf(fd, dpaa_intf->ifid);
+                       continue;
+               }
+
+               offset = (fd->opaque & DPAA_FD_OFFSET_MASK) >>
+                               DPAA_FD_OFFSET_SHIFT;
+               length = fd->opaque & DPAA_FD_LENGTH_MASK;
+
+               mbuf = bufs[i];
+               mbuf->data_off = offset;
+               mbuf->data_len = length;
+               mbuf->pkt_len = length;
+               mbuf->port = dpaa_intf->ifid;
+
+               mbuf->nb_segs = 1;
+               mbuf->ol_flags = 0;
+               mbuf->next = NULL;
+               rte_mbuf_refcnt_set(mbuf, 1);
+               dpaa_eth_packet_info(mbuf, mbuf->buf_addr);
+       }
+}
+
+void dpaa_rx_cb_prepare(struct qm_dqrr_entry *dq, void **bufs)
+{
+       struct dpaa_bp_info *bp_info = DPAA_BPID_TO_POOL_INFO(dq->fd.bpid);
+       void *ptr = rte_dpaa_mem_ptov(qm_fd_addr(&dq->fd));
+
+       /* In case of LS1046, annotation stashing is disabled due to L2 cache
+        * being bottleneck in case of multicore scanario for this platform.
+        * So we prefetch the annoation beforehand, so that it is available
+        * in cache when accessed.
+        */
+       rte_prefetch0((void *)((uint8_t *)ptr + DEFAULT_RX_ICEOF));
+
+       *bufs = (struct rte_mbuf *)((char *)ptr - bp_info->meta_data_size);
 }
 
 static uint16_t
@@ -414,17 +517,79 @@ dpaa_eth_queue_portal_rx(struct qman_fq *fq,
 {
        int ret;
 
-       if (unlikely(fq->qp == NULL)) {
+       if (unlikely(!fq->qp_initialized)) {
                ret = rte_dpaa_portal_fq_init((void *)0, fq);
                if (ret) {
                        DPAA_PMD_ERR("Failure in affining portal %d", ret);
                        return 0;
                }
+               fq->qp_initialized = 1;
        }
 
        return qman_portal_poll_rx(nb_bufs, (void **)bufs, fq->qp);
 }
 
+enum qman_cb_dqrr_result
+dpaa_rx_cb_parallel(void *event,
+                   struct qman_portal *qm __always_unused,
+                   struct qman_fq *fq,
+                   const struct qm_dqrr_entry *dqrr,
+                   void **bufs)
+{
+       u32 ifid = ((struct dpaa_if *)fq->dpaa_intf)->ifid;
+       struct rte_mbuf *mbuf;
+       struct rte_event *ev = (struct rte_event *)event;
+
+       mbuf = dpaa_eth_fd_to_mbuf(&dqrr->fd, ifid);
+       ev->event_ptr = (void *)mbuf;
+       ev->flow_id = fq->ev.flow_id;
+       ev->sub_event_type = fq->ev.sub_event_type;
+       ev->event_type = RTE_EVENT_TYPE_ETHDEV;
+       ev->op = RTE_EVENT_OP_NEW;
+       ev->sched_type = fq->ev.sched_type;
+       ev->queue_id = fq->ev.queue_id;
+       ev->priority = fq->ev.priority;
+       ev->impl_opaque = (uint8_t)DPAA_INVALID_MBUF_SEQN;
+       mbuf->seqn = DPAA_INVALID_MBUF_SEQN;
+       *bufs = mbuf;
+
+       return qman_cb_dqrr_consume;
+}
+
+enum qman_cb_dqrr_result
+dpaa_rx_cb_atomic(void *event,
+                 struct qman_portal *qm __always_unused,
+                 struct qman_fq *fq,
+                 const struct qm_dqrr_entry *dqrr,
+                 void **bufs)
+{
+       u8 index;
+       u32 ifid = ((struct dpaa_if *)fq->dpaa_intf)->ifid;
+       struct rte_mbuf *mbuf;
+       struct rte_event *ev = (struct rte_event *)event;
+
+       mbuf = dpaa_eth_fd_to_mbuf(&dqrr->fd, ifid);
+       ev->event_ptr = (void *)mbuf;
+       ev->flow_id = fq->ev.flow_id;
+       ev->sub_event_type = fq->ev.sub_event_type;
+       ev->event_type = RTE_EVENT_TYPE_ETHDEV;
+       ev->op = RTE_EVENT_OP_NEW;
+       ev->sched_type = fq->ev.sched_type;
+       ev->queue_id = fq->ev.queue_id;
+       ev->priority = fq->ev.priority;
+
+       /* Save active dqrr entries */
+       index = DQRR_PTR2IDX(dqrr);
+       DPAA_PER_LCORE_DQRR_SIZE++;
+       DPAA_PER_LCORE_DQRR_HELD |= 1 << index;
+       DPAA_PER_LCORE_DQRR_MBUF(index) = mbuf;
+       ev->impl_opaque = index + 1;
+       mbuf->seqn = (uint32_t)index + 1;
+       *bufs = mbuf;
+
+       return qman_cb_dqrr_defer;
+}
+
 uint16_t dpaa_eth_queue_rx(void *q,
                           struct rte_mbuf **bufs,
                           uint16_t nb_bufs)
@@ -432,19 +597,37 @@ uint16_t dpaa_eth_queue_rx(void *q,
        struct qman_fq *fq = q;
        struct qm_dqrr_entry *dq;
        uint32_t num_rx = 0, ifid = ((struct dpaa_if *)fq->dpaa_intf)->ifid;
-       int ret;
+       int num_rx_bufs, ret;
+       uint32_t vdqcr_flags = 0;
+
+       if (unlikely(rte_dpaa_bpid_info == NULL &&
+                               rte_eal_process_type() == RTE_PROC_SECONDARY))
+               rte_dpaa_bpid_info = fq->bp_array;
 
        if (likely(fq->is_static))
                return dpaa_eth_queue_portal_rx(fq, bufs, nb_bufs);
 
-       ret = rte_dpaa_portal_init((void *)0);
-       if (ret) {
-               DPAA_PMD_ERR("Failure in affining portal");
-               return 0;
+       if (unlikely(!RTE_PER_LCORE(dpaa_io))) {
+               ret = rte_dpaa_portal_init((void *)0);
+               if (ret) {
+                       DPAA_PMD_ERR("Failure in affining portal");
+                       return 0;
+               }
        }
 
-       ret = qman_set_vdq(fq, (nb_bufs > DPAA_MAX_DEQUEUE_NUM_FRAMES) ?
-                               DPAA_MAX_DEQUEUE_NUM_FRAMES : nb_bufs);
+       /* Until request for four buffers, we provide exact number of buffers.
+        * Otherwise we do not set the QM_VDQCR_EXACT flag.
+        * Not setting QM_VDQCR_EXACT flag can provide two more buffers than
+        * requested, so we request two less in this case.
+        */
+       if (nb_bufs < 4) {
+               vdqcr_flags = QM_VDQCR_EXACT;
+               num_rx_bufs = nb_bufs;
+       } else {
+               num_rx_bufs = nb_bufs > DPAA_MAX_DEQUEUE_NUM_FRAMES ?
+                       (DPAA_MAX_DEQUEUE_NUM_FRAMES - 2) : (nb_bufs - 2);
+       }
+       ret = qman_set_vdq(fq, num_rx_bufs, vdqcr_flags);
        if (ret)
                return 0;
 
@@ -459,52 +642,6 @@ uint16_t dpaa_eth_queue_rx(void *q,
        return num_rx;
 }
 
-static void *dpaa_get_pktbuf(struct dpaa_bp_info *bp_info)
-{
-       int ret;
-       uint64_t buf = 0;
-       struct bm_buffer bufs;
-
-       ret = bman_acquire(bp_info->bp, &bufs, 1, 0);
-       if (ret <= 0) {
-               DPAA_PMD_WARN("Failed to allocate buffers %d", ret);
-               return (void *)buf;
-       }
-
-       DPAA_DP_LOG(DEBUG, "got buffer 0x%lx from pool %d",
-                   (uint64_t)bufs.addr, bufs.bpid);
-
-       buf = (uint64_t)rte_dpaa_mem_ptov(bufs.addr) - bp_info->meta_data_size;
-       if (!buf)
-               goto out;
-
-out:
-       return (void *)buf;
-}
-
-static struct rte_mbuf *dpaa_get_dmable_mbuf(struct rte_mbuf *mbuf,
-                                            struct dpaa_if *dpaa_intf)
-{
-       struct rte_mbuf *dpaa_mbuf;
-
-       /* allocate pktbuffer on bpid for dpaa port */
-       dpaa_mbuf = dpaa_get_pktbuf(dpaa_intf->bp_info);
-       if (!dpaa_mbuf)
-               return NULL;
-
-       memcpy((uint8_t *)(dpaa_mbuf->buf_addr) + RTE_PKTMBUF_HEADROOM, (void *)
-               ((uint8_t *)(mbuf->buf_addr) + mbuf->data_off), mbuf->pkt_len);
-
-       /* Copy only the required fields */
-       dpaa_mbuf->data_off = RTE_PKTMBUF_HEADROOM;
-       dpaa_mbuf->pkt_len = mbuf->pkt_len;
-       dpaa_mbuf->ol_flags = mbuf->ol_flags;
-       dpaa_mbuf->packet_type = mbuf->packet_type;
-       dpaa_mbuf->tx_offload = mbuf->tx_offload;
-       rte_pktmbuf_free(mbuf);
-       return dpaa_mbuf;
-}
-
 int
 dpaa_eth_mbuf_to_sg_fd(struct rte_mbuf *mbuf,
                struct qm_fd *fd,
@@ -678,24 +815,84 @@ tx_on_dpaa_pool(struct rte_mbuf *mbuf,
 }
 
 /* Handle all mbufs on an external pool (non-dpaa) */
-static inline uint16_t
-tx_on_external_pool(struct qman_fq *txq, struct rte_mbuf *mbuf,
-                   struct qm_fd *fd_arr)
+static inline struct rte_mbuf *
+reallocate_mbuf(struct qman_fq *txq, struct rte_mbuf *mbuf)
 {
        struct dpaa_if *dpaa_intf = txq->dpaa_intf;
-       struct rte_mbuf *dmable_mbuf;
+       struct dpaa_bp_info *bp_info = dpaa_intf->bp_info;
+       struct rte_mbuf *new_mbufs[DPAA_SGT_MAX_ENTRIES + 1] = {0};
+       struct rte_mbuf *temp_mbuf;
+       int num_new_segs, mbuf_greater, ret, extra_seg = 0, i = 0;
+       uint64_t mbufs_size, bytes_to_copy, offset1 = 0, offset2 = 0;
+       char *data;
+
+       DPAA_DP_LOG(DEBUG, "Reallocating transmit buffer");
+
+       mbufs_size = bp_info->size -
+               bp_info->meta_data_size - RTE_PKTMBUF_HEADROOM;
+       extra_seg = !!(mbuf->pkt_len % mbufs_size);
+       num_new_segs = (mbuf->pkt_len / mbufs_size) + extra_seg;
+
+       ret = rte_pktmbuf_alloc_bulk(bp_info->mp, new_mbufs, num_new_segs);
+       if (ret != 0) {
+               DPAA_DP_LOG(DEBUG, "Allocation for new buffers failed");
+               return NULL;
+       }
 
-       DPAA_DP_LOG(DEBUG, "Non-BMAN offloaded buffer."
-                   "Allocating an offloaded buffer");
-       dmable_mbuf = dpaa_get_dmable_mbuf(mbuf, dpaa_intf);
-       if (!dmable_mbuf) {
-               DPAA_DP_LOG(DEBUG, "no dpaa buffers.");
-               return 1;
+       temp_mbuf = mbuf;
+
+       while (temp_mbuf) {
+               /* If mbuf data is less than new mbuf remaining memory */
+               if ((temp_mbuf->data_len - offset1) < (mbufs_size - offset2)) {
+                       bytes_to_copy = temp_mbuf->data_len - offset1;
+                       mbuf_greater = -1;
+               /* If mbuf data is greater than new mbuf remaining memory */
+               } else if ((temp_mbuf->data_len - offset1) >
+                          (mbufs_size - offset2)) {
+                       bytes_to_copy = mbufs_size - offset2;
+                       mbuf_greater = 1;
+               /* if mbuf data is equal to new mbuf remaining memory */
+               } else {
+                       bytes_to_copy = temp_mbuf->data_len - offset1;
+                       mbuf_greater = 0;
+               }
+
+               /* Copy the data */
+               data = rte_pktmbuf_append(new_mbufs[0], bytes_to_copy);
+
+               rte_memcpy((uint8_t *)data, rte_pktmbuf_mtod_offset(mbuf,
+                          void *, offset1), bytes_to_copy);
+
+               /* Set new offsets and the temp buffers */
+               if (mbuf_greater == -1) {
+                       offset1 = 0;
+                       offset2 += bytes_to_copy;
+                       temp_mbuf = temp_mbuf->next;
+               } else if (mbuf_greater == 1) {
+                       offset2 = 0;
+                       offset1 += bytes_to_copy;
+                       new_mbufs[i]->next = new_mbufs[i + 1];
+                       new_mbufs[0]->nb_segs++;
+                       i++;
+               } else {
+                       offset1 = 0;
+                       offset2 = 0;
+                       temp_mbuf = temp_mbuf->next;
+                       new_mbufs[i]->next = new_mbufs[i + 1];
+                       if (new_mbufs[i + 1])
+                               new_mbufs[0]->nb_segs++;
+                       i++;
+               }
        }
 
-       DPAA_MBUF_TO_CONTIG_FD(dmable_mbuf, fd_arr, dpaa_intf->bp_info->bpid);
+       /* Copy other required fields */
+       new_mbufs[0]->ol_flags = mbuf->ol_flags;
+       new_mbufs[0]->packet_type = mbuf->packet_type;
+       new_mbufs[0]->tx_offload = mbuf->tx_offload;
 
-       return 0;
+       rte_pktmbuf_free(mbuf);
+
+       return new_mbufs[0];
 }
 
 uint16_t
@@ -707,12 +904,15 @@ dpaa_eth_queue_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
        struct qm_fd fd_arr[DPAA_TX_BURST_SIZE];
        uint32_t frames_to_send, loop, sent = 0;
        uint16_t state;
-       int ret;
+       int ret, realloc_mbuf = 0;
+       uint32_t seqn, index, flags[DPAA_TX_BURST_SIZE] = {0};
 
-       ret = rte_dpaa_portal_init((void *)0);
-       if (ret) {
-               DPAA_PMD_ERR("Failure in affining portal");
-               return 0;
+       if (unlikely(!RTE_PER_LCORE(dpaa_io))) {
+               ret = rte_dpaa_portal_init((void *)0);
+               if (ret) {
+                       DPAA_PMD_ERR("Failure in affining portal");
+                       return 0;
+               }
        }
 
        DPAA_DP_LOG(DEBUG, "Transmitting %d buffers on queue: %p", nb_bufs, q);
@@ -722,12 +922,33 @@ dpaa_eth_queue_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
                                DPAA_TX_BURST_SIZE : nb_bufs;
                for (loop = 0; loop < frames_to_send; loop++) {
                        mbuf = *(bufs++);
+                       /* In case the data offset is not multiple of 16,
+                        * FMAN can stall because of an errata. So reallocate
+                        * the buffer in such case.
+                        */
+                       if (dpaa_svr_family == SVR_LS1043A_FAMILY &&
+                                       (mbuf->data_off & 0x7F) != 0x0)
+                               realloc_mbuf = 1;
+                       seqn = mbuf->seqn;
+                       if (seqn != DPAA_INVALID_MBUF_SEQN) {
+                               index = seqn - 1;
+                               if (DPAA_PER_LCORE_DQRR_HELD & (1 << index)) {
+                                       flags[loop] =
+                                          ((index & QM_EQCR_DCA_IDXMASK) << 8);
+                                       flags[loop] |= QMAN_ENQUEUE_FLAG_DCA;
+                                       DPAA_PER_LCORE_DQRR_SIZE--;
+                                       DPAA_PER_LCORE_DQRR_HELD &=
+                                                               ~(1 << index);
+                               }
+                       }
+
                        if (likely(RTE_MBUF_DIRECT(mbuf))) {
                                mp = mbuf->pool;
                                bp_info = DPAA_MEMPOOL_TO_POOL_INFO(mp);
                                if (likely(mp->ops_index ==
                                                bp_info->dpaa_ops_index &&
                                        mbuf->nb_segs == 1 &&
+                                       realloc_mbuf == 0 &&
                                        rte_mbuf_refcnt_read(mbuf) == 1)) {
                                        DPAA_MBUF_TO_CONTIG_FD(mbuf,
                                                &fd_arr[loop], bp_info->bpid);
@@ -743,22 +964,12 @@ dpaa_eth_queue_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
                        }
 
                        bp_info = DPAA_MEMPOOL_TO_POOL_INFO(mp);
-                       if (likely(mp->ops_index == bp_info->dpaa_ops_index)) {
-                               state = tx_on_dpaa_pool(mbuf, bp_info,
-                                                       &fd_arr[loop]);
-                               if (unlikely(state)) {
-                                       /* Set frames_to_send & nb_bufs so
-                                        * that packets are transmitted till
-                                        * previous frame.
-                                        */
-                                       frames_to_send = loop;
-                                       nb_bufs = loop;
-                                       goto send_pkts;
-                               }
-                       } else {
-                               state = tx_on_external_pool(q, mbuf,
-                                                           &fd_arr[loop]);
-                               if (unlikely(state)) {
+                       if (unlikely(mp->ops_index != bp_info->dpaa_ops_index ||
+                                    realloc_mbuf == 1)) {
+                               struct rte_mbuf *temp_mbuf;
+
+                               temp_mbuf = reallocate_mbuf(q, mbuf);
+                               if (!temp_mbuf) {
                                        /* Set frames_to_send & nb_bufs so
                                         * that packets are transmitted till
                                         * previous frame.
@@ -767,6 +978,20 @@ dpaa_eth_queue_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
                                        nb_bufs = loop;
                                        goto send_pkts;
                                }
+                               mbuf = temp_mbuf;
+                               realloc_mbuf = 0;
+                       }
+
+                       state = tx_on_dpaa_pool(mbuf, bp_info,
+                                               &fd_arr[loop]);
+                       if (unlikely(state)) {
+                               /* Set frames_to_send & nb_bufs so
+                                * that packets are transmitted till
+                                * previous frame.
+                                */
+                               frames_to_send = loop;
+                               nb_bufs = loop;
+                               goto send_pkts;
                        }
                }
 
@@ -774,7 +999,8 @@ send_pkts:
                loop = 0;
                while (loop < frames_to_send) {
                        loop += qman_enqueue_multi(q, &fd_arr[loop],
-                                       frames_to_send - loop);
+                                                  &flags[loop],
+                                                  frames_to_send - loop);
                }
                nb_bufs -= frames_to_send;
                sent += frames_to_send;