4 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 #include <rte_ethdev.h>
37 #include <rte_common.h>
39 #include "base/fm10k_type.h"
41 #ifdef RTE_PMD_PACKET_PREFETCH
42 #define rte_packet_prefetch(p) rte_prefetch1(p)
44 #define rte_packet_prefetch(p) do {} while (0)
47 #ifdef RTE_LIBRTE_FM10K_DEBUG_RX
48 static inline void dump_rxd(union fm10k_rx_desc *rxd)
50 PMD_RX_LOG(DEBUG, "+----------------|----------------+");
51 PMD_RX_LOG(DEBUG, "| GLORT | PKT HDR & TYPE |");
52 PMD_RX_LOG(DEBUG, "| 0x%08x | 0x%08x |", rxd->d.glort,
54 PMD_RX_LOG(DEBUG, "+----------------|----------------+");
55 PMD_RX_LOG(DEBUG, "| VLAN & LEN | STATUS |");
56 PMD_RX_LOG(DEBUG, "| 0x%08x | 0x%08x |", rxd->d.vlan_len,
58 PMD_RX_LOG(DEBUG, "+----------------|----------------+");
59 PMD_RX_LOG(DEBUG, "| RESERVED | RSS_HASH |");
60 PMD_RX_LOG(DEBUG, "| 0x%08x | 0x%08x |", 0, rxd->d.rss);
61 PMD_RX_LOG(DEBUG, "+----------------|----------------+");
62 PMD_RX_LOG(DEBUG, "| TIME TAG |");
63 PMD_RX_LOG(DEBUG, "| 0x%016"PRIx64" |", rxd->q.timestamp);
64 PMD_RX_LOG(DEBUG, "+----------------|----------------+");
69 rx_desc_to_ol_flags(struct rte_mbuf *m, const union fm10k_rx_desc *d)
72 ptype_table[FM10K_RXD_PKTTYPE_MASK >> FM10K_RXD_PKTTYPE_SHIFT]
73 __rte_cache_aligned = {
74 [FM10K_PKTTYPE_OTHER] = RTE_PTYPE_L2_ETHER,
75 [FM10K_PKTTYPE_IPV4] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4,
76 [FM10K_PKTTYPE_IPV4_EX] = RTE_PTYPE_L2_ETHER |
77 RTE_PTYPE_L3_IPV4_EXT,
78 [FM10K_PKTTYPE_IPV6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6,
79 [FM10K_PKTTYPE_IPV6_EX] = RTE_PTYPE_L2_ETHER |
80 RTE_PTYPE_L3_IPV6_EXT,
81 [FM10K_PKTTYPE_IPV4 | FM10K_PKTTYPE_TCP] = RTE_PTYPE_L2_ETHER |
82 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
83 [FM10K_PKTTYPE_IPV6 | FM10K_PKTTYPE_TCP] = RTE_PTYPE_L2_ETHER |
84 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
85 [FM10K_PKTTYPE_IPV4 | FM10K_PKTTYPE_UDP] = RTE_PTYPE_L2_ETHER |
86 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
87 [FM10K_PKTTYPE_IPV6 | FM10K_PKTTYPE_UDP] = RTE_PTYPE_L2_ETHER |
88 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
91 m->packet_type = ptype_table[(d->w.pkt_info & FM10K_RXD_PKTTYPE_MASK)
92 >> FM10K_RXD_PKTTYPE_SHIFT];
94 if (d->w.pkt_info & FM10K_RXD_RSSTYPE_MASK)
95 m->ol_flags |= PKT_RX_RSS_HASH;
97 if (unlikely((d->d.staterr &
98 (FM10K_RXD_STATUS_IPCS | FM10K_RXD_STATUS_IPE)) ==
99 (FM10K_RXD_STATUS_IPCS | FM10K_RXD_STATUS_IPE)))
100 m->ol_flags |= PKT_RX_IP_CKSUM_BAD;
102 if (unlikely((d->d.staterr &
103 (FM10K_RXD_STATUS_L4CS | FM10K_RXD_STATUS_L4E)) ==
104 (FM10K_RXD_STATUS_L4CS | FM10K_RXD_STATUS_L4E)))
105 m->ol_flags |= PKT_RX_L4_CKSUM_BAD;
107 if (unlikely(d->d.staterr & FM10K_RXD_STATUS_HBO))
108 m->ol_flags |= PKT_RX_HBUF_OVERFLOW;
110 if (unlikely(d->d.staterr & FM10K_RXD_STATUS_RXE))
111 m->ol_flags |= PKT_RX_RECIP_ERR;
115 fm10k_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
118 struct rte_mbuf *mbuf;
119 union fm10k_rx_desc desc;
120 struct fm10k_rx_queue *q = rx_queue;
126 next_dd = q->next_dd;
128 nb_pkts = RTE_MIN(nb_pkts, q->alloc_thresh);
129 for (count = 0; count < nb_pkts; ++count) {
130 mbuf = q->sw_ring[next_dd];
131 desc = q->hw_ring[next_dd];
132 if (!(desc.d.staterr & FM10K_RXD_STATUS_DD))
134 #ifdef RTE_LIBRTE_FM10K_DEBUG_RX
137 rte_pktmbuf_pkt_len(mbuf) = desc.w.length;
138 rte_pktmbuf_data_len(mbuf) = desc.w.length;
141 #ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE
142 rx_desc_to_ol_flags(mbuf, &desc);
145 mbuf->hash.rss = desc.d.rss;
147 * Packets in fm10k device always carry at least one VLAN tag.
148 * For those packets coming in without VLAN tag,
149 * the port default VLAN tag will be used.
150 * So, always PKT_RX_VLAN_PKT flag is set and vlan_tci
151 * is valid for each RX packet's mbuf.
153 mbuf->ol_flags |= PKT_RX_VLAN_PKT;
154 mbuf->vlan_tci = desc.w.vlan;
156 rx_pkts[count] = mbuf;
157 if (++next_dd == q->nb_desc) {
162 /* Prefetch next mbuf while processing current one. */
163 rte_prefetch0(q->sw_ring[next_dd]);
166 * When next RX descriptor is on a cache-line boundary,
167 * prefetch the next 4 RX descriptors and the next 8 pointers
170 if ((next_dd & 0x3) == 0) {
171 rte_prefetch0(&q->hw_ring[next_dd]);
172 rte_prefetch0(&q->sw_ring[next_dd]);
176 q->next_dd = next_dd;
178 if ((q->next_dd > q->next_trigger) || (alloc == 1)) {
179 ret = rte_mempool_get_bulk(q->mp,
180 (void **)&q->sw_ring[q->next_alloc],
183 if (unlikely(ret != 0)) {
184 uint8_t port = q->port_id;
185 PMD_RX_LOG(ERR, "Failed to alloc mbuf");
187 * Need to restore next_dd if we cannot allocate new
188 * buffers to replenish the old ones.
190 q->next_dd = (q->next_dd + q->nb_desc - count) %
192 rte_eth_devices[port].data->rx_mbuf_alloc_failed++;
196 for (; q->next_alloc <= q->next_trigger; ++q->next_alloc) {
197 mbuf = q->sw_ring[q->next_alloc];
199 /* setup static mbuf fields */
200 fm10k_pktmbuf_reset(mbuf, q->port_id);
202 /* write descriptor */
203 desc.q.pkt_addr = MBUF_DMA_ADDR_DEFAULT(mbuf);
204 desc.q.hdr_addr = MBUF_DMA_ADDR_DEFAULT(mbuf);
205 q->hw_ring[q->next_alloc] = desc;
207 FM10K_PCI_REG_WRITE(q->tail_ptr, q->next_trigger);
208 q->next_trigger += q->alloc_thresh;
209 if (q->next_trigger >= q->nb_desc) {
210 q->next_trigger = q->alloc_thresh - 1;
219 fm10k_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
222 struct rte_mbuf *mbuf;
223 union fm10k_rx_desc desc;
224 struct fm10k_rx_queue *q = rx_queue;
226 uint16_t nb_rcv, nb_seg;
229 struct rte_mbuf *first_seg = q->pkt_first_seg;
230 struct rte_mbuf *last_seg = q->pkt_last_seg;
233 next_dd = q->next_dd;
236 nb_seg = RTE_MIN(nb_pkts, q->alloc_thresh);
237 for (count = 0; count < nb_seg; count++) {
238 mbuf = q->sw_ring[next_dd];
239 desc = q->hw_ring[next_dd];
240 if (!(desc.d.staterr & FM10K_RXD_STATUS_DD))
242 #ifdef RTE_LIBRTE_FM10K_DEBUG_RX
246 if (++next_dd == q->nb_desc) {
251 /* Prefetch next mbuf while processing current one. */
252 rte_prefetch0(q->sw_ring[next_dd]);
255 * When next RX descriptor is on a cache-line boundary,
256 * prefetch the next 4 RX descriptors and the next 8 pointers
259 if ((next_dd & 0x3) == 0) {
260 rte_prefetch0(&q->hw_ring[next_dd]);
261 rte_prefetch0(&q->sw_ring[next_dd]);
264 /* Fill data length */
265 rte_pktmbuf_data_len(mbuf) = desc.w.length;
268 * If this is the first buffer of the received packet,
269 * set the pointer to the first mbuf of the packet and
270 * initialize its context.
271 * Otherwise, update the total length and the number of segments
272 * of the current scattered packet, and update the pointer to
273 * the last mbuf of the current packet.
277 first_seg->pkt_len = desc.w.length;
280 (uint16_t)(first_seg->pkt_len +
281 rte_pktmbuf_data_len(mbuf));
282 first_seg->nb_segs++;
283 last_seg->next = mbuf;
287 * If this is not the last buffer of the received packet,
288 * update the pointer to the last mbuf of the current scattered
289 * packet and continue to parse the RX ring.
291 if (!(desc.d.staterr & FM10K_RXD_STATUS_EOP)) {
296 first_seg->ol_flags = 0;
297 #ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE
298 rx_desc_to_ol_flags(first_seg, &desc);
300 first_seg->hash.rss = desc.d.rss;
302 * Packets in fm10k device always carry at least one VLAN tag.
303 * For those packets coming in without VLAN tag,
304 * the port default VLAN tag will be used.
305 * So, always PKT_RX_VLAN_PKT flag is set and vlan_tci
306 * is valid for each RX packet's mbuf.
308 mbuf->ol_flags |= PKT_RX_VLAN_PKT;
309 first_seg->vlan_tci = desc.w.vlan;
311 /* Prefetch data of first segment, if configured to do so. */
312 rte_packet_prefetch((char *)first_seg->buf_addr +
313 first_seg->data_off);
316 * Store the mbuf address into the next entry of the array
317 * of returned packets.
319 rx_pkts[nb_rcv++] = first_seg;
322 * Setup receipt context for a new packet.
327 q->next_dd = next_dd;
329 if ((q->next_dd > q->next_trigger) || (alloc == 1)) {
330 ret = rte_mempool_get_bulk(q->mp,
331 (void **)&q->sw_ring[q->next_alloc],
334 if (unlikely(ret != 0)) {
335 uint8_t port = q->port_id;
336 PMD_RX_LOG(ERR, "Failed to alloc mbuf");
338 * Need to restore next_dd if we cannot allocate new
339 * buffers to replenish the old ones.
341 q->next_dd = (q->next_dd + q->nb_desc - count) %
343 rte_eth_devices[port].data->rx_mbuf_alloc_failed++;
347 for (; q->next_alloc <= q->next_trigger; ++q->next_alloc) {
348 mbuf = q->sw_ring[q->next_alloc];
350 /* setup static mbuf fields */
351 fm10k_pktmbuf_reset(mbuf, q->port_id);
353 /* write descriptor */
354 desc.q.pkt_addr = MBUF_DMA_ADDR_DEFAULT(mbuf);
355 desc.q.hdr_addr = MBUF_DMA_ADDR_DEFAULT(mbuf);
356 q->hw_ring[q->next_alloc] = desc;
358 FM10K_PCI_REG_WRITE(q->tail_ptr, q->next_trigger);
359 q->next_trigger += q->alloc_thresh;
360 if (q->next_trigger >= q->nb_desc) {
361 q->next_trigger = q->alloc_thresh - 1;
366 q->pkt_first_seg = first_seg;
367 q->pkt_last_seg = last_seg;
372 static inline void tx_free_descriptors(struct fm10k_tx_queue *q)
374 uint16_t next_rs, count = 0;
376 next_rs = fifo_peek(&q->rs_tracker);
377 if (!(q->hw_ring[next_rs].flags & FM10K_TXD_FLAG_DONE))
380 /* the DONE flag is set on this descriptor so remove the ID
381 * from the RS bit tracker and free the buffers */
382 fifo_remove(&q->rs_tracker);
384 /* wrap around? if so, free buffers from last_free up to but NOT
385 * including nb_desc */
386 if (q->last_free > next_rs) {
387 count = q->nb_desc - q->last_free;
388 while (q->last_free < q->nb_desc) {
389 rte_pktmbuf_free_seg(q->sw_ring[q->last_free]);
390 q->sw_ring[q->last_free] = NULL;
396 /* adjust free descriptor count before the next loop */
397 q->nb_free += count + (next_rs + 1 - q->last_free);
399 /* free buffers from last_free, up to and including next_rs */
400 while (q->last_free <= next_rs) {
401 rte_pktmbuf_free_seg(q->sw_ring[q->last_free]);
402 q->sw_ring[q->last_free] = NULL;
406 if (q->last_free == q->nb_desc)
410 static inline void tx_xmit_pkt(struct fm10k_tx_queue *q, struct rte_mbuf *mb)
413 uint8_t flags, hdrlen;
415 /* always set the LAST flag on the last descriptor used to
416 * transmit the packet */
417 flags = FM10K_TXD_FLAG_LAST;
418 last_id = q->next_free + mb->nb_segs - 1;
419 if (last_id >= q->nb_desc)
420 last_id = last_id - q->nb_desc;
422 /* but only set the RS flag on the last descriptor if rs_thresh
423 * descriptors will be used since the RS flag was last set */
424 if ((q->nb_used + mb->nb_segs) >= q->rs_thresh) {
425 flags |= FM10K_TXD_FLAG_RS;
426 fifo_insert(&q->rs_tracker, last_id);
429 q->nb_used = q->nb_used + mb->nb_segs;
432 q->nb_free -= mb->nb_segs;
434 q->hw_ring[q->next_free].flags = 0;
435 /* set checksum flags on first descriptor of packet. SCTP checksum
436 * offload is not supported, but we do not explicitly check for this
437 * case in favor of greatly simplified processing. */
438 if (mb->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK | PKT_TX_TCP_SEG))
439 q->hw_ring[q->next_free].flags |= FM10K_TXD_FLAG_CSUM;
441 /* set vlan if requested */
442 if (mb->ol_flags & PKT_TX_VLAN_PKT)
443 q->hw_ring[q->next_free].vlan = mb->vlan_tci;
445 q->sw_ring[q->next_free] = mb;
446 q->hw_ring[q->next_free].buffer_addr =
447 rte_cpu_to_le_64(MBUF_DMA_ADDR(mb));
448 q->hw_ring[q->next_free].buflen =
449 rte_cpu_to_le_16(rte_pktmbuf_data_len(mb));
451 if (mb->ol_flags & PKT_TX_TCP_SEG) {
452 hdrlen = mb->outer_l2_len + mb->outer_l3_len + mb->l2_len +
453 mb->l3_len + mb->l4_len;
454 if (q->hw_ring[q->next_free].flags & FM10K_TXD_FLAG_FTAG)
455 hdrlen += sizeof(struct fm10k_ftag);
457 if (likely((hdrlen >= FM10K_TSO_MIN_HEADERLEN) &&
458 (hdrlen <= FM10K_TSO_MAX_HEADERLEN) &&
459 (mb->tso_segsz >= FM10K_TSO_MINMSS))) {
460 q->hw_ring[q->next_free].mss = mb->tso_segsz;
461 q->hw_ring[q->next_free].hdrlen = hdrlen;
465 if (++q->next_free == q->nb_desc)
468 /* fill up the rings */
469 for (mb = mb->next; mb != NULL; mb = mb->next) {
470 q->sw_ring[q->next_free] = mb;
471 q->hw_ring[q->next_free].buffer_addr =
472 rte_cpu_to_le_64(MBUF_DMA_ADDR(mb));
473 q->hw_ring[q->next_free].buflen =
474 rte_cpu_to_le_16(rte_pktmbuf_data_len(mb));
475 q->hw_ring[q->next_free].flags = 0;
476 if (++q->next_free == q->nb_desc)
480 q->hw_ring[last_id].flags |= flags;
484 fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
487 struct fm10k_tx_queue *q = tx_queue;
491 for (count = 0; count < nb_pkts; ++count) {
494 /* running low on descriptors? try to free some... */
495 if (q->nb_free < q->free_thresh)
496 tx_free_descriptors(q);
498 /* make sure there are enough free descriptors to transmit the
499 * entire packet before doing anything */
500 if (q->nb_free < mb->nb_segs)
503 /* sanity check to make sure the mbuf is valid */
504 if ((mb->nb_segs == 0) ||
505 ((mb->nb_segs > 1) && (mb->next == NULL)))
508 /* process the packet */
512 /* update the tail pointer if any packets were processed */
513 if (likely(count > 0))
514 FM10K_PCI_REG_WRITE(q->tail_ptr, q->next_free);