net: add rte prefix to ether structures
[dpdk.git] / drivers / net / dpaa / dpaa_rxtx.c
index 565ca50..7f174f7 100644 (file)
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  *
  *   Copyright 2016 Freescale Semiconductor, Inc. All rights reserved.
- *   Copyright 2017 NXP
+ *   Copyright 2017,2019 NXP
  *
  */
 
 #include <rte_eal.h>
 #include <rte_alarm.h>
 #include <rte_ether.h>
-#include <rte_ethdev.h>
+#include <rte_ethdev_driver.h>
 #include <rte_malloc.h>
 #include <rte_ring.h>
 #include <rte_ip.h>
 #include <rte_tcp.h>
 #include <rte_udp.h>
 #include <rte_net.h>
+#include <rte_eventdev.h>
 
 #include "dpaa_ethdev.h"
 #include "dpaa_rxtx.h"
 #include <rte_dpaa_bus.h>
 #include <dpaa_mempool.h>
 
+#include <qman.h>
 #include <fsl_usd.h>
 #include <fsl_qman.h>
 #include <fsl_bman.h>
@@ -57,7 +59,7 @@
        } while (0)
 
 #if (defined RTE_LIBRTE_DPAA_DEBUG_DRIVER)
-void dpaa_display_frame(const struct qm_fd *fd)
+static void dpaa_display_frame(const struct qm_fd *fd)
 {
        int ii;
        char *ptr;
@@ -88,21 +90,14 @@ static inline void dpaa_slow_parsing(struct rte_mbuf *m __rte_unused,
        /*TBD:XXX: to be implemented*/
 }
 
-static inline void dpaa_eth_packet_info(struct rte_mbuf *m,
-                                       uint64_t fd_virt_addr)
+static inline void dpaa_eth_packet_info(struct rte_mbuf *m, void *fd_virt_addr)
 {
        struct annotations_t *annot = GET_ANNOTATIONS(fd_virt_addr);
-       uint64_t prs = *((uint64_t *)(&annot->parse)) & DPAA_PARSE_MASK;
+       uint64_t prs = *((uintptr_t *)(&annot->parse)) & DPAA_PARSE_MASK;
 
        DPAA_DP_LOG(DEBUG, " Parsing mbuf: %p with annotations: %p", m, annot);
 
        switch (prs) {
-       case DPAA_PKT_TYPE_NONE:
-               m->packet_type = 0;
-               break;
-       case DPAA_PKT_TYPE_ETHER:
-               m->packet_type = RTE_PTYPE_L2_ETHER;
-               break;
        case DPAA_PKT_TYPE_IPV4:
                m->packet_type = RTE_PTYPE_L2_ETHER |
                        RTE_PTYPE_L3_IPV4;
@@ -111,6 +106,9 @@ static inline void dpaa_eth_packet_info(struct rte_mbuf *m,
                m->packet_type = RTE_PTYPE_L2_ETHER |
                        RTE_PTYPE_L3_IPV6;
                break;
+       case DPAA_PKT_TYPE_ETHER:
+               m->packet_type = RTE_PTYPE_L2_ETHER;
+               break;
        case DPAA_PKT_TYPE_IPV4_FRAG:
        case DPAA_PKT_TYPE_IPV4_FRAG_UDP:
        case DPAA_PKT_TYPE_IPV4_FRAG_TCP:
@@ -173,6 +171,9 @@ static inline void dpaa_eth_packet_info(struct rte_mbuf *m,
                m->packet_type = RTE_PTYPE_L2_ETHER |
                        RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP;
                break;
+       case DPAA_PKT_TYPE_NONE:
+               m->packet_type = 0;
+               break;
        /* More switch cases can be added */
        default:
                dpaa_slow_parsing(m, prs);
@@ -183,12 +184,11 @@ static inline void dpaa_eth_packet_info(struct rte_mbuf *m,
                                        << DPAA_PKT_L3_LEN_SHIFT;
 
        /* Set the hash values */
-       m->hash.rss = (uint32_t)(rte_be_to_cpu_64(annot->hash));
-       m->ol_flags = PKT_RX_RSS_HASH;
+       m->hash.rss = (uint32_t)(annot->hash);
        /* All packets with Bad checksum are dropped by interface (and
         * corresponding notification issued to RX error queues).
         */
-       m->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
+       m->ol_flags = PKT_RX_RSS_HASH | PKT_RX_IP_CKSUM_GOOD;
 
        /* Check if Vlan is present */
        if (prs & DPAA_PARSE_VLAN_MASK)
@@ -198,7 +198,8 @@ static inline void dpaa_eth_packet_info(struct rte_mbuf *m,
 
 static inline void dpaa_checksum(struct rte_mbuf *mbuf)
 {
-       struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
+       struct rte_ether_hdr *eth_hdr =
+               rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *);
        char *l3_hdr = (char *)eth_hdr + mbuf->l2_len;
        struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)l3_hdr;
        struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)l3_hdr;
@@ -272,8 +273,32 @@ static inline void dpaa_checksum_offload(struct rte_mbuf *mbuf,
        fd->cmd = DPAA_FD_CMD_RPD | DPAA_FD_CMD_DTC;
 }
 
+static inline void
+dpaa_unsegmented_checksum(struct rte_mbuf *mbuf, struct qm_fd *fd_arr)
+{
+       if (!mbuf->packet_type) {
+               struct rte_net_hdr_lens hdr_lens;
+
+               mbuf->packet_type = rte_net_get_ptype(mbuf, &hdr_lens,
+                               RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK
+                               | RTE_PTYPE_L4_MASK);
+               mbuf->l2_len = hdr_lens.l2_len;
+               mbuf->l3_len = hdr_lens.l3_len;
+       }
+       if (mbuf->data_off < (DEFAULT_TX_ICEOF +
+           sizeof(struct dpaa_eth_parse_results_t))) {
+               DPAA_DP_LOG(DEBUG, "Checksum offload Err: "
+                       "Not enough Headroom "
+                       "space for correct Checksum offload."
+                       "So Calculating checksum in Software.");
+               dpaa_checksum(mbuf);
+       } else {
+               dpaa_checksum_offload(mbuf, fd_arr, mbuf->buf_addr);
+       }
+}
+
 struct rte_mbuf *
-dpaa_eth_sg_to_mbuf(struct qm_fd *fd, uint32_t ifid)
+dpaa_eth_sg_to_mbuf(const struct qm_fd *fd, uint32_t ifid)
 {
        struct dpaa_bp_info *bp_info = DPAA_BPID_TO_POOL_INFO(fd->bpid);
        struct rte_mbuf *first_seg, *prev_seg, *cur_seg, *temp;
@@ -282,9 +307,7 @@ dpaa_eth_sg_to_mbuf(struct qm_fd *fd, uint32_t ifid)
        int i = 0;
        uint8_t fd_offset = fd->offset;
 
-       DPAA_DP_LOG(DEBUG, "Received an SG frame");
-
-       vaddr = rte_dpaa_mem_ptov(qm_fd_addr(fd));
+       vaddr = DPAA_MEMPOOL_PTOV(bp_info, qm_fd_addr(fd));
        if (!vaddr) {
                DPAA_PMD_ERR("unable to convert physical address");
                return NULL;
@@ -293,7 +316,7 @@ dpaa_eth_sg_to_mbuf(struct qm_fd *fd, uint32_t ifid)
        sg_temp = &sgt[i++];
        hw_sg_to_cpu(sg_temp);
        temp = (struct rte_mbuf *)((char *)vaddr - bp_info->meta_data_size);
-       sg_vaddr = rte_dpaa_mem_ptov(qm_sg_entry_get64(sg_temp));
+       sg_vaddr = DPAA_MEMPOOL_PTOV(bp_info, qm_sg_entry_get64(sg_temp));
 
        first_seg = (struct rte_mbuf *)((char *)sg_vaddr -
                                                bp_info->meta_data_size);
@@ -309,7 +332,8 @@ dpaa_eth_sg_to_mbuf(struct qm_fd *fd, uint32_t ifid)
        while (i < DPAA_SGT_MAX_ENTRIES) {
                sg_temp = &sgt[i++];
                hw_sg_to_cpu(sg_temp);
-               sg_vaddr = rte_dpaa_mem_ptov(qm_sg_entry_get64(sg_temp));
+               sg_vaddr = DPAA_MEMPOOL_PTOV(bp_info,
+                                            qm_sg_entry_get64(sg_temp));
                cur_seg = (struct rte_mbuf *)((char *)sg_vaddr -
                                                      bp_info->meta_data_size);
                cur_seg->data_off = sg_temp->offset;
@@ -324,41 +348,41 @@ dpaa_eth_sg_to_mbuf(struct qm_fd *fd, uint32_t ifid)
                }
                prev_seg = cur_seg;
        }
+       DPAA_DP_LOG(DEBUG, "Received an SG frame len =%d, num_sg =%d",
+                       first_seg->pkt_len, first_seg->nb_segs);
 
-       dpaa_eth_packet_info(first_seg, (uint64_t)vaddr);
+       dpaa_eth_packet_info(first_seg, vaddr);
        rte_pktmbuf_free_seg(temp);
 
        return first_seg;
 }
 
-static inline struct rte_mbuf *dpaa_eth_fd_to_mbuf(struct qm_fd *fd,
-                                                       uint32_t ifid)
+static inline struct rte_mbuf *
+dpaa_eth_fd_to_mbuf(const struct qm_fd *fd, uint32_t ifid)
 {
-       struct dpaa_bp_info *bp_info = DPAA_BPID_TO_POOL_INFO(fd->bpid);
        struct rte_mbuf *mbuf;
+       struct dpaa_bp_info *bp_info = DPAA_BPID_TO_POOL_INFO(fd->bpid);
        void *ptr;
        uint8_t format =
                (fd->opaque & DPAA_FD_FORMAT_MASK) >> DPAA_FD_FORMAT_SHIFT;
-       uint16_t offset =
-               (fd->opaque & DPAA_FD_OFFSET_MASK) >> DPAA_FD_OFFSET_SHIFT;
-       uint32_t length = fd->opaque & DPAA_FD_LENGTH_MASK;
-
-       DPAA_DP_LOG(DEBUG, " FD--->MBUF");
+       uint16_t offset;
+       uint32_t length;
 
        if (unlikely(format == qm_fd_sg))
                return dpaa_eth_sg_to_mbuf(fd, ifid);
 
+       offset = (fd->opaque & DPAA_FD_OFFSET_MASK) >> DPAA_FD_OFFSET_SHIFT;
+       length = fd->opaque & DPAA_FD_LENGTH_MASK;
+
+       DPAA_DP_LOG(DEBUG, " FD--->MBUF off %d len = %d", offset, length);
+
        /* Ignoring case when format != qm_fd_contig */
        dpaa_display_frame(fd);
-       ptr = rte_dpaa_mem_ptov(fd->addr);
-       /* Ignoring case when ptr would be NULL. That is only possible incase
-        * of a corrupted packet
-        */
+       ptr = DPAA_MEMPOOL_PTOV(bp_info, qm_fd_addr(fd));
 
        mbuf = (struct rte_mbuf *)((char *)ptr - bp_info->meta_data_size);
        /* Prefetch the Parse results and packet data to L1 */
        rte_prefetch0((void *)((uint8_t *)ptr + DEFAULT_RX_ICEOF));
-       rte_prefetch0((void *)((uint8_t *)ptr + offset));
 
        mbuf->data_off = offset;
        mbuf->data_len = length;
@@ -369,11 +393,202 @@ static inline struct rte_mbuf *dpaa_eth_fd_to_mbuf(struct qm_fd *fd,
        mbuf->ol_flags = 0;
        mbuf->next = NULL;
        rte_mbuf_refcnt_set(mbuf, 1);
-       dpaa_eth_packet_info(mbuf, (uint64_t)mbuf->buf_addr);
+       dpaa_eth_packet_info(mbuf, mbuf->buf_addr);
 
        return mbuf;
 }
 
+/* Specific for LS1043 */
+void
+dpaa_rx_cb_no_prefetch(struct qman_fq **fq, struct qm_dqrr_entry **dqrr,
+          void **bufs, int num_bufs)
+{
+       struct rte_mbuf *mbuf;
+       struct dpaa_bp_info *bp_info;
+       const struct qm_fd *fd;
+       void *ptr;
+       struct dpaa_if *dpaa_intf;
+       uint16_t offset, i;
+       uint32_t length;
+       uint8_t format;
+
+       bp_info = DPAA_BPID_TO_POOL_INFO(dqrr[0]->fd.bpid);
+       ptr = rte_dpaa_mem_ptov(qm_fd_addr(&dqrr[0]->fd));
+       rte_prefetch0((void *)((uint8_t *)ptr + DEFAULT_RX_ICEOF));
+       bufs[0] = (struct rte_mbuf *)((char *)ptr - bp_info->meta_data_size);
+
+       for (i = 0; i < num_bufs; i++) {
+               if (i < num_bufs - 1) {
+                       bp_info = DPAA_BPID_TO_POOL_INFO(dqrr[i + 1]->fd.bpid);
+                       ptr = rte_dpaa_mem_ptov(qm_fd_addr(&dqrr[i + 1]->fd));
+                       rte_prefetch0((void *)((uint8_t *)ptr +
+                                       DEFAULT_RX_ICEOF));
+                       bufs[i + 1] = (struct rte_mbuf *)((char *)ptr -
+                                       bp_info->meta_data_size);
+               }
+
+               fd = &dqrr[i]->fd;
+               dpaa_intf = fq[0]->dpaa_intf;
+
+               format = (fd->opaque & DPAA_FD_FORMAT_MASK) >>
+                               DPAA_FD_FORMAT_SHIFT;
+               if (unlikely(format == qm_fd_sg)) {
+                       bufs[i] = dpaa_eth_sg_to_mbuf(fd, dpaa_intf->ifid);
+                       continue;
+               }
+
+               offset = (fd->opaque & DPAA_FD_OFFSET_MASK) >>
+                               DPAA_FD_OFFSET_SHIFT;
+               length = fd->opaque & DPAA_FD_LENGTH_MASK;
+
+               mbuf = bufs[i];
+               mbuf->data_off = offset;
+               mbuf->data_len = length;
+               mbuf->pkt_len = length;
+               mbuf->port = dpaa_intf->ifid;
+
+               mbuf->nb_segs = 1;
+               mbuf->ol_flags = 0;
+               mbuf->next = NULL;
+               rte_mbuf_refcnt_set(mbuf, 1);
+               dpaa_eth_packet_info(mbuf, mbuf->buf_addr);
+       }
+}
+
+void
+dpaa_rx_cb(struct qman_fq **fq, struct qm_dqrr_entry **dqrr,
+          void **bufs, int num_bufs)
+{
+       struct rte_mbuf *mbuf;
+       const struct qm_fd *fd;
+       struct dpaa_if *dpaa_intf;
+       uint16_t offset, i;
+       uint32_t length;
+       uint8_t format;
+
+       for (i = 0; i < num_bufs; i++) {
+               fd = &dqrr[i]->fd;
+               dpaa_intf = fq[0]->dpaa_intf;
+
+               format = (fd->opaque & DPAA_FD_FORMAT_MASK) >>
+                               DPAA_FD_FORMAT_SHIFT;
+               if (unlikely(format == qm_fd_sg)) {
+                       bufs[i] = dpaa_eth_sg_to_mbuf(fd, dpaa_intf->ifid);
+                       continue;
+               }
+
+               offset = (fd->opaque & DPAA_FD_OFFSET_MASK) >>
+                               DPAA_FD_OFFSET_SHIFT;
+               length = fd->opaque & DPAA_FD_LENGTH_MASK;
+
+               mbuf = bufs[i];
+               mbuf->data_off = offset;
+               mbuf->data_len = length;
+               mbuf->pkt_len = length;
+               mbuf->port = dpaa_intf->ifid;
+
+               mbuf->nb_segs = 1;
+               mbuf->ol_flags = 0;
+               mbuf->next = NULL;
+               rte_mbuf_refcnt_set(mbuf, 1);
+               dpaa_eth_packet_info(mbuf, mbuf->buf_addr);
+       }
+}
+
+void dpaa_rx_cb_prepare(struct qm_dqrr_entry *dq, void **bufs)
+{
+       struct dpaa_bp_info *bp_info = DPAA_BPID_TO_POOL_INFO(dq->fd.bpid);
+       void *ptr = rte_dpaa_mem_ptov(qm_fd_addr(&dq->fd));
+
+       /* In case of LS1046, annotation stashing is disabled due to L2 cache
+        * being bottleneck in case of multicore scanario for this platform.
+        * So we prefetch the annoation beforehand, so that it is available
+        * in cache when accessed.
+        */
+       rte_prefetch0((void *)((uint8_t *)ptr + DEFAULT_RX_ICEOF));
+
+       *bufs = (struct rte_mbuf *)((char *)ptr - bp_info->meta_data_size);
+}
+
+static uint16_t
+dpaa_eth_queue_portal_rx(struct qman_fq *fq,
+                        struct rte_mbuf **bufs,
+                        uint16_t nb_bufs)
+{
+       int ret;
+
+       if (unlikely(fq->qp == NULL)) {
+               ret = rte_dpaa_portal_fq_init((void *)0, fq);
+               if (ret) {
+                       DPAA_PMD_ERR("Failure in affining portal %d", ret);
+                       return 0;
+               }
+       }
+
+       return qman_portal_poll_rx(nb_bufs, (void **)bufs, fq->qp);
+}
+
+enum qman_cb_dqrr_result
+dpaa_rx_cb_parallel(void *event,
+                   struct qman_portal *qm __always_unused,
+                   struct qman_fq *fq,
+                   const struct qm_dqrr_entry *dqrr,
+                   void **bufs)
+{
+       u32 ifid = ((struct dpaa_if *)fq->dpaa_intf)->ifid;
+       struct rte_mbuf *mbuf;
+       struct rte_event *ev = (struct rte_event *)event;
+
+       mbuf = dpaa_eth_fd_to_mbuf(&dqrr->fd, ifid);
+       ev->event_ptr = (void *)mbuf;
+       ev->flow_id = fq->ev.flow_id;
+       ev->sub_event_type = fq->ev.sub_event_type;
+       ev->event_type = RTE_EVENT_TYPE_ETHDEV;
+       ev->op = RTE_EVENT_OP_NEW;
+       ev->sched_type = fq->ev.sched_type;
+       ev->queue_id = fq->ev.queue_id;
+       ev->priority = fq->ev.priority;
+       ev->impl_opaque = (uint8_t)DPAA_INVALID_MBUF_SEQN;
+       mbuf->seqn = DPAA_INVALID_MBUF_SEQN;
+       *bufs = mbuf;
+
+       return qman_cb_dqrr_consume;
+}
+
+enum qman_cb_dqrr_result
+dpaa_rx_cb_atomic(void *event,
+                 struct qman_portal *qm __always_unused,
+                 struct qman_fq *fq,
+                 const struct qm_dqrr_entry *dqrr,
+                 void **bufs)
+{
+       u8 index;
+       u32 ifid = ((struct dpaa_if *)fq->dpaa_intf)->ifid;
+       struct rte_mbuf *mbuf;
+       struct rte_event *ev = (struct rte_event *)event;
+
+       mbuf = dpaa_eth_fd_to_mbuf(&dqrr->fd, ifid);
+       ev->event_ptr = (void *)mbuf;
+       ev->flow_id = fq->ev.flow_id;
+       ev->sub_event_type = fq->ev.sub_event_type;
+       ev->event_type = RTE_EVENT_TYPE_ETHDEV;
+       ev->op = RTE_EVENT_OP_NEW;
+       ev->sched_type = fq->ev.sched_type;
+       ev->queue_id = fq->ev.queue_id;
+       ev->priority = fq->ev.priority;
+
+       /* Save active dqrr entries */
+       index = DQRR_PTR2IDX(dqrr);
+       DPAA_PER_LCORE_DQRR_SIZE++;
+       DPAA_PER_LCORE_DQRR_HELD |= 1 << index;
+       DPAA_PER_LCORE_DQRR_MBUF(index) = mbuf;
+       ev->impl_opaque = index + 1;
+       mbuf->seqn = (uint32_t)index + 1;
+       *bufs = mbuf;
+
+       return qman_cb_dqrr_defer;
+}
+
 uint16_t dpaa_eth_queue_rx(void *q,
                           struct rte_mbuf **bufs,
                           uint16_t nb_bufs)
@@ -381,16 +596,37 @@ uint16_t dpaa_eth_queue_rx(void *q,
        struct qman_fq *fq = q;
        struct qm_dqrr_entry *dq;
        uint32_t num_rx = 0, ifid = ((struct dpaa_if *)fq->dpaa_intf)->ifid;
-       int ret;
+       int num_rx_bufs, ret;
+       uint32_t vdqcr_flags = 0;
 
-       ret = rte_dpaa_portal_init((void *)0);
-       if (ret) {
-               DPAA_PMD_ERR("Failure in affining portal");
-               return 0;
+       if (unlikely(rte_dpaa_bpid_info == NULL &&
+                               rte_eal_process_type() == RTE_PROC_SECONDARY))
+               rte_dpaa_bpid_info = fq->bp_array;
+
+       if (likely(fq->is_static))
+               return dpaa_eth_queue_portal_rx(fq, bufs, nb_bufs);
+
+       if (unlikely(!RTE_PER_LCORE(dpaa_io))) {
+               ret = rte_dpaa_portal_init((void *)0);
+               if (ret) {
+                       DPAA_PMD_ERR("Failure in affining portal");
+                       return 0;
+               }
        }
 
-       ret = qman_set_vdq(fq, (nb_bufs > DPAA_MAX_DEQUEUE_NUM_FRAMES) ?
-                               DPAA_MAX_DEQUEUE_NUM_FRAMES : nb_bufs);
+       /* Until request for four buffers, we provide exact number of buffers.
+        * Otherwise we do not set the QM_VDQCR_EXACT flag.
+        * Not setting QM_VDQCR_EXACT flag can provide two more buffers than
+        * requested, so we request two less in this case.
+        */
+       if (nb_bufs < 4) {
+               vdqcr_flags = QM_VDQCR_EXACT;
+               num_rx_bufs = nb_bufs;
+       } else {
+               num_rx_bufs = nb_bufs > DPAA_MAX_DEQUEUE_NUM_FRAMES ?
+                       (DPAA_MAX_DEQUEUE_NUM_FRAMES - 2) : (nb_bufs - 2);
+       }
+       ret = qman_set_vdq(fq, num_rx_bufs, vdqcr_flags);
        if (ret)
                return 0;
 
@@ -408,7 +644,7 @@ uint16_t dpaa_eth_queue_rx(void *q,
 static void *dpaa_get_pktbuf(struct dpaa_bp_info *bp_info)
 {
        int ret;
-       uint64_t buf = 0;
+       size_t buf = 0;
        struct bm_buffer bufs;
 
        ret = bman_acquire(bp_info->bp, &bufs, 1, 0);
@@ -417,10 +653,11 @@ static void *dpaa_get_pktbuf(struct dpaa_bp_info *bp_info)
                return (void *)buf;
        }
 
-       DPAA_DP_LOG(DEBUG, "got buffer 0x%lx from pool %d",
+       DPAA_DP_LOG(DEBUG, "got buffer 0x%" PRIx64 " from pool %d",
                    (uint64_t)bufs.addr, bufs.bpid);
 
-       buf = (uint64_t)rte_dpaa_mem_ptov(bufs.addr) - bp_info->meta_data_size;
+       buf = (size_t)DPAA_MEMPOOL_PTOV(bp_info, bufs.addr)
+                               - bp_info->meta_data_size;
        if (!buf)
                goto out;
 
@@ -438,11 +675,11 @@ static struct rte_mbuf *dpaa_get_dmable_mbuf(struct rte_mbuf *mbuf,
        if (!dpaa_mbuf)
                return NULL;
 
-       memcpy((uint8_t *)(dpaa_mbuf->buf_addr) + mbuf->data_off, (void *)
+       memcpy((uint8_t *)(dpaa_mbuf->buf_addr) + RTE_PKTMBUF_HEADROOM, (void *)
                ((uint8_t *)(mbuf->buf_addr) + mbuf->data_off), mbuf->pkt_len);
 
        /* Copy only the required fields */
-       dpaa_mbuf->data_off = mbuf->data_off;
+       dpaa_mbuf->data_off = RTE_PKTMBUF_HEADROOM;
        dpaa_mbuf->pkt_len = mbuf->pkt_len;
        dpaa_mbuf->ol_flags = mbuf->ol_flags;
        dpaa_mbuf->packet_type = mbuf->packet_type;
@@ -594,27 +831,8 @@ tx_on_dpaa_pool_unsegmented(struct rte_mbuf *mbuf,
                rte_pktmbuf_free(mbuf);
        }
 
-       if (mbuf->ol_flags & DPAA_TX_CKSUM_OFFLOAD_MASK) {
-               if (!mbuf->packet_type) {
-                       struct rte_net_hdr_lens hdr_lens;
-
-                       mbuf->packet_type = rte_net_get_ptype(mbuf, &hdr_lens,
-                                       RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK
-                                       | RTE_PTYPE_L4_MASK);
-                       mbuf->l2_len = hdr_lens.l2_len;
-                       mbuf->l3_len = hdr_lens.l3_len;
-               }
-               if (mbuf->data_off < (DEFAULT_TX_ICEOF +
-                   sizeof(struct dpaa_eth_parse_results_t))) {
-                       DPAA_DP_LOG(DEBUG, "Checksum offload Err: "
-                               "Not enough Headroom "
-                               "space for correct Checksum offload."
-                               "So Calculating checksum in Software.");
-                       dpaa_checksum(mbuf);
-               } else {
-                       dpaa_checksum_offload(mbuf, fd_arr, mbuf->buf_addr);
-               }
-       }
+       if (mbuf->ol_flags & DPAA_TX_CKSUM_OFFLOAD_MASK)
+               dpaa_unsegmented_checksum(mbuf, fd_arr);
 }
 
 /* Handle all mbufs on dpaa BMAN managed pool */
@@ -659,6 +877,8 @@ tx_on_external_pool(struct qman_fq *txq, struct rte_mbuf *mbuf,
        }
 
        DPAA_MBUF_TO_CONTIG_FD(dmable_mbuf, fd_arr, dpaa_intf->bp_info->bpid);
+       if (mbuf->ol_flags & DPAA_TX_CKSUM_OFFLOAD_MASK)
+               dpaa_unsegmented_checksum(mbuf, fd_arr);
 
        return 0;
 }
@@ -670,14 +890,17 @@ dpaa_eth_queue_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
        struct rte_mempool *mp;
        struct dpaa_bp_info *bp_info;
        struct qm_fd fd_arr[DPAA_TX_BURST_SIZE];
-       uint32_t frames_to_send, loop, i = 0;
+       uint32_t frames_to_send, loop, sent = 0;
        uint16_t state;
        int ret;
+       uint32_t seqn, index, flags[DPAA_TX_BURST_SIZE] = {0};
 
-       ret = rte_dpaa_portal_init((void *)0);
-       if (ret) {
-               DPAA_PMD_ERR("Failure in affining portal");
-               return 0;
+       if (unlikely(!RTE_PER_LCORE(dpaa_io))) {
+               ret = rte_dpaa_portal_init((void *)0);
+               if (ret) {
+                       DPAA_PMD_ERR("Failure in affining portal");
+                       return 0;
+               }
        }
 
        DPAA_DP_LOG(DEBUG, "Transmitting %d buffers on queue: %p", nb_bufs, q);
@@ -685,10 +908,36 @@ dpaa_eth_queue_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
        while (nb_bufs) {
                frames_to_send = (nb_bufs > DPAA_TX_BURST_SIZE) ?
                                DPAA_TX_BURST_SIZE : nb_bufs;
-               for (loop = 0; loop < frames_to_send; loop++, i++) {
-                       mbuf = bufs[i];
-                       if (RTE_MBUF_DIRECT(mbuf)) {
+               for (loop = 0; loop < frames_to_send; loop++) {
+                       mbuf = *(bufs++);
+                       seqn = mbuf->seqn;
+                       if (seqn != DPAA_INVALID_MBUF_SEQN) {
+                               index = seqn - 1;
+                               if (DPAA_PER_LCORE_DQRR_HELD & (1 << index)) {
+                                       flags[loop] =
+                                          ((index & QM_EQCR_DCA_IDXMASK) << 8);
+                                       flags[loop] |= QMAN_ENQUEUE_FLAG_DCA;
+                                       DPAA_PER_LCORE_DQRR_SIZE--;
+                                       DPAA_PER_LCORE_DQRR_HELD &=
+                                                               ~(1 << index);
+                               }
+                       }
+
+                       if (likely(RTE_MBUF_DIRECT(mbuf))) {
                                mp = mbuf->pool;
+                               bp_info = DPAA_MEMPOOL_TO_POOL_INFO(mp);
+                               if (likely(mp->ops_index ==
+                                               bp_info->dpaa_ops_index &&
+                                       mbuf->nb_segs == 1 &&
+                                       rte_mbuf_refcnt_read(mbuf) == 1)) {
+                                       DPAA_MBUF_TO_CONTIG_FD(mbuf,
+                                               &fd_arr[loop], bp_info->bpid);
+                                       if (mbuf->ol_flags &
+                                               DPAA_TX_CKSUM_OFFLOAD_MASK)
+                                               dpaa_unsegmented_checksum(mbuf,
+                                                       &fd_arr[loop]);
+                                       continue;
+                               }
                        } else {
                                mi = rte_mbuf_from_indirect(mbuf);
                                mp = mi->pool;
@@ -726,14 +975,16 @@ send_pkts:
                loop = 0;
                while (loop < frames_to_send) {
                        loop += qman_enqueue_multi(q, &fd_arr[loop],
-                                       frames_to_send - loop);
+                                                  &flags[loop],
+                                                  frames_to_send - loop);
                }
                nb_bufs -= frames_to_send;
+               sent += frames_to_send;
        }
 
-       DPAA_DP_LOG(DEBUG, "Transmitted %d buffers on queue: %p", i, q);
+       DPAA_DP_LOG(DEBUG, "Transmitted %d buffers on queue: %p", sent, q);
 
-       return i;
+       return sent;
 }
 
 uint16_t dpaa_eth_tx_drop_all(void *q  __rte_unused,