net/dpaa2: fix prefetch Rx to honor number of packets
[dpdk.git] / drivers / net / mlx4 / mlx4_rxtx.c
index 71ed3a9..78b6dd5 100644 (file)
@@ -1,34 +1,6 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright 2017 6WIND S.A.
- *   Copyright 2017 Mellanox
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of 6WIND S.A. nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox Technologies, Ltd
  */
 
 /**
@@ -80,48 +52,58 @@ uint32_t mlx4_ptype_table[0x100] __rte_cache_aligned = {
         *  bit[4] - MLX4_CQE_STATUS_TCP
         *  bit[3] - MLX4_CQE_STATUS_IPV4OPT
         *  bit[2] - MLX4_CQE_STATUS_IPV6
-        *  bit[1] - MLX4_CQE_STATUS_IPV4F
+        *  bit[1] - MLX4_CQE_STATUS_IPF
         *  bit[0] - MLX4_CQE_STATUS_IPV4
         * giving a total of up to 256 entries.
         */
+       /* L2 */
        [0x00] = RTE_PTYPE_L2_ETHER,
-       [0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
+       /* L3 */
+       [0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                    RTE_PTYPE_L4_NONFRAG,
        [0x02] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_L4_FRAG,
        [0x03] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_L4_FRAG,
-       [0x04] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
-       [0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT,
+       [0x04] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_L4_NONFRAG,
+       [0x06] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_L4_FRAG,
+       [0x08] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
+                    RTE_PTYPE_L4_NONFRAG,
+       [0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
+                    RTE_PTYPE_L4_NONFRAG,
        [0x0a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
                     RTE_PTYPE_L4_FRAG,
+       [0x0b] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
+                    RTE_PTYPE_L4_FRAG,
+       /* TCP */
        [0x11] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_L4_TCP,
-       [0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_L4_TCP,
        [0x14] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
                     RTE_PTYPE_L4_TCP,
+       [0x16] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_L4_FRAG,
        [0x18] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
                     RTE_PTYPE_L4_TCP,
        [0x19] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
                     RTE_PTYPE_L4_TCP,
-       [0x1a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
-                    RTE_PTYPE_L4_TCP,
+       /* UDP */
        [0x21] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_L4_UDP,
-       [0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_L4_UDP,
        [0x24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
                     RTE_PTYPE_L4_UDP,
+       [0x26] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_L4_FRAG,
        [0x28] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
                     RTE_PTYPE_L4_UDP,
        [0x29] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
                     RTE_PTYPE_L4_UDP,
-       [0x2a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
-                    RTE_PTYPE_L4_UDP,
        /* Tunneled - L3 IPV6 */
        [0x80] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
        [0x81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN,
+                    RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L4_NONFRAG,
        [0x82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L4_FRAG,
@@ -129,65 +111,58 @@ uint32_t mlx4_ptype_table[0x100] __rte_cache_aligned = {
                     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L4_FRAG,
        [0x84] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN,
+                    RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L4_NONFRAG,
+       [0x86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L4_FRAG,
        [0x88] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV4_EXT,
+                    RTE_PTYPE_INNER_L3_IPV4_EXT |
+                    RTE_PTYPE_INNER_L4_NONFRAG,
        [0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV4_EXT,
+                    RTE_PTYPE_INNER_L3_IPV4_EXT |
+                    RTE_PTYPE_INNER_L4_NONFRAG,
        [0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_FRAG,
+                    RTE_PTYPE_INNER_L3_IPV4_EXT |
+                    RTE_PTYPE_INNER_L4_FRAG,
+       [0x8b] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L3_IPV4_EXT |
+                    RTE_PTYPE_INNER_L4_FRAG,
        /* Tunneled - L3 IPV6, TCP */
        [0x91] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L4_TCP,
-       [0x92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L4_FRAG |
-                    RTE_PTYPE_INNER_L4_TCP,
-       [0x93] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L4_FRAG |
-                    RTE_PTYPE_INNER_L4_TCP,
        [0x94] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L4_TCP,
+       [0x96] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L4_FRAG,
        [0x98] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV4_EXT |
-                    RTE_PTYPE_INNER_L4_TCP,
+                    RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
        [0x99] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV4_EXT |
-                    RTE_PTYPE_INNER_L4_TCP,
-       [0x9a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_FRAG |
-                    RTE_PTYPE_INNER_L4_TCP,
+                    RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
        /* Tunneled - L3 IPV6, UDP */
-       [0xa1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L4_UDP,
-       [0xa2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+       [0xa1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L4_FRAG |
                     RTE_PTYPE_INNER_L4_UDP,
-       [0xa3] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L4_FRAG |
-                    RTE_PTYPE_INNER_L4_UDP,
-       [0xa4] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+       [0xa4] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L4_UDP,
-       [0xa8] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+       [0xa6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L4_FRAG,
+       [0xa8] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L3_IPV4_EXT |
                     RTE_PTYPE_INNER_L4_UDP,
-       [0xa9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+       [0xa9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L3_IPV4_EXT |
                     RTE_PTYPE_INNER_L4_UDP,
-       [0xaa] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_FRAG |
-                    RTE_PTYPE_INNER_L4_UDP,
        /* Tunneled - L3 IPV4 */
        [0xc0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
        [0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN,
+                    RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L4_NONFRAG,
        [0xc2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L4_FRAG,
@@ -195,65 +170,54 @@ uint32_t mlx4_ptype_table[0x100] __rte_cache_aligned = {
                     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L4_FRAG,
        [0xc4] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN,
+                    RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L4_NONFRAG,
+       [0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L4_FRAG,
        [0xc8] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV4_EXT,
+                    RTE_PTYPE_INNER_L3_IPV4_EXT |
+                    RTE_PTYPE_INNER_L4_NONFRAG,
        [0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV4_EXT,
+                    RTE_PTYPE_INNER_L3_IPV4_EXT |
+                    RTE_PTYPE_INNER_L4_NONFRAG,
        [0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L3_IPV4_EXT |
                     RTE_PTYPE_INNER_L4_FRAG,
+       [0xcb] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L3_IPV4_EXT |
+                    RTE_PTYPE_INNER_L4_FRAG,
        /* Tunneled - L3 IPV4, TCP */
-       [0xd0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L4_TCP,
        [0xd1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L4_TCP,
-       [0xd2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L4_FRAG |
-                    RTE_PTYPE_INNER_L4_TCP,
-       [0xd3] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L4_FRAG |
-                    RTE_PTYPE_INNER_L4_TCP,
        [0xd4] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L4_TCP,
+       [0xd6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L4_FRAG,
        [0xd8] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L3_IPV4_EXT |
                     RTE_PTYPE_INNER_L4_TCP,
        [0xd9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L3_IPV4_EXT |
                     RTE_PTYPE_INNER_L4_TCP,
-       [0xda] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_FRAG |
-                    RTE_PTYPE_INNER_L4_TCP,
        /* Tunneled - L3 IPV4, UDP */
-       [0xe0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L4_UDP,
        [0xe1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L4_UDP,
-       [0xe2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L4_FRAG |
-                    RTE_PTYPE_INNER_L4_UDP,
-       [0xe3] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L4_FRAG |
-                    RTE_PTYPE_INNER_L4_UDP,
        [0xe4] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L4_UDP,
+       [0xe6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L4_FRAG,
        [0xe8] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
+                    RTE_PTYPE_INNER_L3_IPV4_EXT |
+                    RTE_PTYPE_INNER_L4_UDP,
        [0xe9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
-       [0xea] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                    RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_FRAG |
+                    RTE_PTYPE_INNER_L3_IPV4_EXT |
                     RTE_PTYPE_INNER_L4_UDP,
 };
 
@@ -290,7 +254,7 @@ mlx4_txq_stamp_freed_wqe(struct mlx4_sq *sq, volatile uint32_t *start,
                } while (start != (volatile uint32_t *)sq->eob);
                start = (volatile uint32_t *)sq->buf;
                /* Flip invalid stamping ownership. */
-               stamp ^= RTE_BE32(0x1 << MLX4_SQ_OWNER_BIT);
+               stamp ^= RTE_BE32(1u << MLX4_SQ_OWNER_BIT);
                sq->stamp = stamp;
                if (start == end)
                        return size;
@@ -312,10 +276,14 @@ mlx4_txq_stamp_freed_wqe(struct mlx4_sq *sq, volatile uint32_t *start,
  *
  * @param txq
  *   Pointer to Tx queue structure.
+ * @param elts_m
+ *   Tx elements number mask.
+ * @param sq
+ *   Pointer to the SQ structure.
  */
 static void
-mlx4_txq_complete(struct txq *txq, const unsigned int elts_n,
-                                 struct mlx4_sq *sq)
+mlx4_txq_complete(struct txq *txq, const unsigned int elts_m,
+                 struct mlx4_sq *sq)
 {
        unsigned int elts_tail = txq->elts_tail;
        struct mlx4_cq *cq = &txq->mcq;
@@ -355,38 +323,17 @@ mlx4_txq_complete(struct txq *txq, const unsigned int elts_n,
        if (unlikely(!completed))
                return;
        /* First stamping address is the end of the last one. */
-       first_txbb = (&(*txq->elts)[elts_tail])->eocb;
+       first_txbb = (&(*txq->elts)[elts_tail & elts_m])->eocb;
        elts_tail += completed;
-       if (elts_tail >= elts_n)
-               elts_tail -= elts_n;
        /* The new tail element holds the end address. */
        sq->remain_size += mlx4_txq_stamp_freed_wqe(sq, first_txbb,
-               (&(*txq->elts)[elts_tail])->eocb);
+               (&(*txq->elts)[elts_tail & elts_m])->eocb);
        /* Update CQ consumer index. */
        cq->cons_index = cons_index;
        *cq->set_ci_db = rte_cpu_to_be_32(cons_index & MLX4_CQ_DB_CI_MASK);
-       txq->elts_comp -= completed;
        txq->elts_tail = elts_tail;
 }
 
-/**
- * Get memory pool (MP) from mbuf. If mbuf is indirect, the pool from which
- * the cloned mbuf is allocated is returned instead.
- *
- * @param buf
- *   Pointer to mbuf.
- *
- * @return
- *   Memory pool where data is located for given mbuf.
- */
-static struct rte_mempool *
-mlx4_txq_mb2mp(struct rte_mbuf *buf)
-{
-       if (unlikely(RTE_MBUF_INDIRECT(buf)))
-               return rte_mbuf_from_indirect(buf)->pool;
-       return buf->pool;
-}
-
 /**
  * Write Tx data segment to the SQ.
  *
@@ -404,7 +351,7 @@ mlx4_fill_tx_data_seg(volatile struct mlx4_wqe_data_seg *dseg,
                       uint32_t lkey, uintptr_t addr, rte_be32_t  byte_count)
 {
        dseg->addr = rte_cpu_to_be_64(addr);
-       dseg->lkey = rte_cpu_to_be_32(lkey);
+       dseg->lkey = lkey;
 #if RTE_CACHE_LINE_SIZE < 64
        /*
         * Need a barrier here before writing the byte_count
@@ -463,7 +410,7 @@ mlx4_tx_burst_segs(struct rte_mbuf *buf, struct txq *txq,
        goto txbb_tail_segs;
 txbb_head_seg:
        /* Memory region key (big endian) for this memory pool. */
-       lkey = mlx4_txq_mp2mr(txq, mlx4_txq_mb2mp(sbuf));
+       lkey = mlx4_tx_mb2mr(txq, sbuf);
        if (unlikely(lkey == (uint32_t)-1)) {
                DEBUG("%p: unable to get MP <-> MR association",
                      (void *)txq);
@@ -475,7 +422,7 @@ txbb_head_seg:
                dseg = (volatile struct mlx4_wqe_data_seg *)
                        sq->buf;
        dseg->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(sbuf, uintptr_t));
-       dseg->lkey = rte_cpu_to_be_32(lkey);
+       dseg->lkey = lkey;
        /*
         * This data segment starts at the beginning of a new
         * TXBB, so we need to postpone its byte_count writing
@@ -495,7 +442,7 @@ txbb_tail_segs:
        /* Jump to default if there are more than two segments remaining. */
        switch (nb_segs) {
        default:
-               lkey = mlx4_txq_mp2mr(txq, mlx4_txq_mb2mp(sbuf));
+               lkey = mlx4_tx_mb2mr(txq, sbuf);
                if (unlikely(lkey == (uint32_t)-1)) {
                        DEBUG("%p: unable to get MP <-> MR association",
                              (void *)txq);
@@ -511,7 +458,7 @@ txbb_tail_segs:
                nb_segs--;
                /* fallthrough */
        case 2:
-               lkey = mlx4_txq_mp2mr(txq, mlx4_txq_mb2mp(sbuf));
+               lkey = mlx4_tx_mb2mr(txq, sbuf);
                if (unlikely(lkey == (uint32_t)-1)) {
                        DEBUG("%p: unable to get MP <-> MR association",
                              (void *)txq);
@@ -527,7 +474,7 @@ txbb_tail_segs:
                nb_segs--;
                /* fallthrough */
        case 1:
-               lkey = mlx4_txq_mp2mr(txq, mlx4_txq_mb2mp(sbuf));
+               lkey = mlx4_tx_mb2mr(txq, sbuf);
                if (unlikely(lkey == (uint32_t)-1)) {
                        DEBUG("%p: unable to get MP <-> MR association",
                              (void *)txq);
@@ -580,33 +527,30 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
        struct txq *txq = (struct txq *)dpdk_txq;
        unsigned int elts_head = txq->elts_head;
        const unsigned int elts_n = txq->elts_n;
+       const unsigned int elts_m = elts_n - 1;
        unsigned int bytes_sent = 0;
        unsigned int i;
-       unsigned int max;
+       unsigned int max = elts_head - txq->elts_tail;
        struct mlx4_sq *sq = &txq->msq;
        volatile struct mlx4_wqe_ctrl_seg *ctrl;
        struct txq_elt *elt;
 
        assert(txq->elts_comp_cd != 0);
-       if (likely(txq->elts_comp != 0))
-               mlx4_txq_complete(txq, elts_n, sq);
-       max = (elts_n - (elts_head - txq->elts_tail));
-       if (max > elts_n)
-               max -= elts_n;
+       if (likely(max >= txq->elts_comp_cd_init))
+               mlx4_txq_complete(txq, elts_m, sq);
+       max = elts_n - max;
        assert(max >= 1);
        assert(max <= elts_n);
        /* Always leave one free entry in the ring. */
        --max;
        if (max > pkts_n)
                max = pkts_n;
-       elt = &(*txq->elts)[elts_head];
+       elt = &(*txq->elts)[elts_head & elts_m];
        /* First Tx burst element saves the next WQE control segment. */
        ctrl = elt->wqe;
        for (i = 0; (i != max); ++i) {
                struct rte_mbuf *buf = pkts[i];
-               unsigned int elts_head_next =
-                       (((elts_head + 1) == elts_n) ? 0 : elts_head + 1);
-               struct txq_elt *elt_next = &(*txq->elts)[elts_head_next];
+               struct txq_elt *elt_next = &(*txq->elts)[++elts_head & elts_m];
                uint32_t owner_opcode = sq->owner_opcode;
                volatile struct mlx4_wqe_data_seg *dseg =
                                (volatile struct mlx4_wqe_data_seg *)(ctrl + 1);
@@ -640,7 +584,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                                elt->buf = NULL;
                                break;
                        }
-                       lkey = mlx4_txq_mp2mr(txq, mlx4_txq_mb2mp(buf));
+                       lkey = mlx4_tx_mb2mr(txq, buf);
                        if (unlikely(lkey == (uint32_t)-1)) {
                                /* MR does not exist. */
                                DEBUG("%p: unable to get MP <-> MR association",
@@ -668,7 +612,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                        ctrl_next = (volatile struct mlx4_wqe_ctrl_seg *)
                                ((volatile uint8_t *)ctrl_next - sq->size);
                        /* Flip HW valid ownership. */
-                       sq->owner_opcode ^= 0x1 << MLX4_SQ_OWNER_BIT;
+                       sq->owner_opcode ^= 1u << MLX4_SQ_OWNER_BIT;
                }
                /*
                 * For raw Ethernet, the SOLICIT flag is used to indicate
@@ -725,7 +669,6 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                ctrl->owner_opcode = rte_cpu_to_be_32(owner_opcode);
                elt->buf = buf;
                bytes_sent += buf->pkt_len;
-               elts_head = elts_head_next;
                ctrl = ctrl_next;
                elt = elt_next;
        }
@@ -741,8 +684,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
        rte_wmb();
        /* Ring QP doorbell. */
        rte_write32(txq->msq.doorbell_qpn, txq->msq.db);
-       txq->elts_head = elts_head;
-       txq->elts_comp += i;
+       txq->elts_head += i;
        return i;
 }
 
@@ -777,11 +719,13 @@ rxq_cq_to_pkt_type(volatile struct mlx4_cqe *cqe,
         *  bit[4] - MLX4_CQE_STATUS_TCP
         *  bit[3] - MLX4_CQE_STATUS_IPV4OPT
         *  bit[2] - MLX4_CQE_STATUS_IPV6
-        *  bit[1] - MLX4_CQE_STATUS_IPV4F
+        *  bit[1] - MLX4_CQE_STATUS_IPF
         *  bit[0] - MLX4_CQE_STATUS_IPV4
         * giving a total of up to 256 entries.
         */
        idx |= ((status & MLX4_CQE_STATUS_PTYPE_MASK) >> 22);
+       if (status & MLX4_CQE_STATUS_IPV6)
+               idx |= ((status & MLX4_CQE_STATUS_IPV6F) >> 11);
        return mlx4_ptype_table[idx];
 }
 
@@ -965,11 +909,14 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                                goto skip;
                        }
                        pkt = seg;
+                       assert(len >= (rxq->crc_present << 2));
                        /* Update packet information. */
                        pkt->packet_type =
                                rxq_cq_to_pkt_type(cqe, rxq->l2tun_offload);
                        pkt->ol_flags = PKT_RX_RSS_HASH;
                        pkt->hash.rss = cqe->immed_rss_invalid;
+                       if (rxq->crc_present)
+                               len -= ETHER_CRC_LEN;
                        pkt->pkt_len = len;
                        if (rxq->csum | rxq->csum_l2tun) {
                                uint32_t flags =
@@ -994,6 +941,9 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                 * changes.
                 */
                scat->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t));
+               /* If there's only one MR, no need to replace LKey in WQE. */
+               if (unlikely(mlx4_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1))
+                       scat->lkey = mlx4_rx_mb2mr(rxq, rep);
                if (len > seg->data_len) {
                        len -= seg->data_len;
                        ++pkt->nb_segs;