4 * Copyright(c) 2017 Intel Corporation.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <rte_ethdev.h>
36 #include <rte_malloc.h>
38 #include "base/i40e_prototype.h"
39 #include "base/i40e_type.h"
40 #include "i40e_ethdev.h"
41 #include "i40e_rxtx.h"
42 #include "i40e_rxtx_vec_common.h"
44 #include <x86intrin.h>
46 #ifndef __INTEL_COMPILER
47 #pragma GCC diagnostic ignored "-Wcast-qual"
51 vtx1(volatile struct i40e_tx_desc *txdp,
52 struct rte_mbuf *pkt, uint64_t flags)
54 uint64_t high_qw = (I40E_TX_DESC_DTYPE_DATA |
55 ((uint64_t)flags << I40E_TXD_QW1_CMD_SHIFT) |
56 ((uint64_t)pkt->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT));
58 __m128i descriptor = _mm_set_epi64x(high_qw,
59 pkt->buf_physaddr + pkt->data_off);
60 _mm_store_si128((__m128i *)txdp, descriptor);
64 vtx(volatile struct i40e_tx_desc *txdp,
65 struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags)
67 const uint64_t hi_qw_tmpl = (I40E_TX_DESC_DTYPE_DATA |
68 ((uint64_t)flags << I40E_TXD_QW1_CMD_SHIFT));
70 /* if unaligned on 32-bit boundary, do one to align */
71 if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
72 vtx1(txdp, *pkt, flags);
73 nb_pkts--, txdp++, pkt++;
76 /* do two at a time while possible, in bursts */
77 for (; nb_pkts > 3; txdp += 4, pkt += 4, nb_pkts -= 4) {
78 uint64_t hi_qw3 = hi_qw_tmpl |
79 ((uint64_t)pkt[3]->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT);
80 uint64_t hi_qw2 = hi_qw_tmpl |
81 ((uint64_t)pkt[2]->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT);
82 uint64_t hi_qw1 = hi_qw_tmpl |
83 ((uint64_t)pkt[1]->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT);
84 uint64_t hi_qw0 = hi_qw_tmpl |
85 ((uint64_t)pkt[0]->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT);
87 __m256i desc2_3 = _mm256_set_epi64x(
88 hi_qw3, pkt[3]->buf_physaddr + pkt[3]->data_off,
89 hi_qw2, pkt[2]->buf_physaddr + pkt[2]->data_off);
90 __m256i desc0_1 = _mm256_set_epi64x(
91 hi_qw1, pkt[1]->buf_physaddr + pkt[1]->data_off,
92 hi_qw0, pkt[0]->buf_physaddr + pkt[0]->data_off);
93 _mm256_store_si256((void *)(txdp + 2), desc2_3);
94 _mm256_store_si256((void *)txdp, desc0_1);
97 /* do any last ones */
99 vtx1(txdp, *pkt, flags);
100 txdp++, pkt++, nb_pkts--;
104 static inline uint16_t
105 i40e_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
108 struct i40e_tx_queue *txq = (struct i40e_tx_queue *)tx_queue;
109 volatile struct i40e_tx_desc *txdp;
110 struct i40e_tx_entry *txep;
111 uint16_t n, nb_commit, tx_id;
112 uint64_t flags = I40E_TD_CMD;
113 uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD;
115 /* cross rx_thresh boundary is not allowed */
116 nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
118 if (txq->nb_tx_free < txq->tx_free_thresh)
119 i40e_tx_free_bufs(txq);
121 nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
122 if (unlikely(nb_pkts == 0))
125 tx_id = txq->tx_tail;
126 txdp = &txq->tx_ring[tx_id];
127 txep = &txq->sw_ring[tx_id];
129 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
131 n = (uint16_t)(txq->nb_tx_desc - tx_id);
132 if (nb_commit >= n) {
133 tx_backlog_entry(txep, tx_pkts, n);
135 vtx(txdp, tx_pkts, n - 1, flags);
139 vtx1(txdp, *tx_pkts++, rs);
141 nb_commit = (uint16_t)(nb_commit - n);
144 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
146 /* avoid reach the end of ring */
147 txdp = &txq->tx_ring[tx_id];
148 txep = &txq->sw_ring[tx_id];
151 tx_backlog_entry(txep, tx_pkts, nb_commit);
153 vtx(txdp, tx_pkts, nb_commit, flags);
155 tx_id = (uint16_t)(tx_id + nb_commit);
156 if (tx_id > txq->tx_next_rs) {
157 txq->tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
158 rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) <<
159 I40E_TXD_QW1_CMD_SHIFT);
161 (uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh);
164 txq->tx_tail = tx_id;
166 I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
172 i40e_xmit_pkts_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
176 struct i40e_tx_queue *txq = (struct i40e_tx_queue *)tx_queue;
181 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
182 ret = i40e_xmit_fixed_burst_vec_avx2(tx_queue, &tx_pkts[nb_tx],