4 * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 #include <rte_ethdev.h>
37 #include <rte_common.h>
40 #include "base/fm10k_type.h"
42 #ifdef RTE_PMD_PACKET_PREFETCH
43 #define rte_packet_prefetch(p) rte_prefetch1(p)
45 #define rte_packet_prefetch(p) do {} while (0)
48 #ifdef RTE_LIBRTE_FM10K_DEBUG_RX
49 static inline void dump_rxd(union fm10k_rx_desc *rxd)
51 PMD_RX_LOG(DEBUG, "+----------------|----------------+");
52 PMD_RX_LOG(DEBUG, "| GLORT | PKT HDR & TYPE |");
53 PMD_RX_LOG(DEBUG, "| 0x%08x | 0x%08x |", rxd->d.glort,
55 PMD_RX_LOG(DEBUG, "+----------------|----------------+");
56 PMD_RX_LOG(DEBUG, "| VLAN & LEN | STATUS |");
57 PMD_RX_LOG(DEBUG, "| 0x%08x | 0x%08x |", rxd->d.vlan_len,
59 PMD_RX_LOG(DEBUG, "+----------------|----------------+");
60 PMD_RX_LOG(DEBUG, "| RESERVED | RSS_HASH |");
61 PMD_RX_LOG(DEBUG, "| 0x%08x | 0x%08x |", 0, rxd->d.rss);
62 PMD_RX_LOG(DEBUG, "+----------------|----------------+");
63 PMD_RX_LOG(DEBUG, "| TIME TAG |");
64 PMD_RX_LOG(DEBUG, "| 0x%016"PRIx64" |", rxd->q.timestamp);
65 PMD_RX_LOG(DEBUG, "+----------------|----------------+");
69 #define FM10K_TX_OFFLOAD_MASK ( \
75 #define FM10K_TX_OFFLOAD_NOTSUP_MASK \
76 (PKT_TX_OFFLOAD_MASK ^ FM10K_TX_OFFLOAD_MASK)
78 /* @note: When this function is changed, make corresponding change to
79 * fm10k_dev_supported_ptypes_get()
82 rx_desc_to_ol_flags(struct rte_mbuf *m, const union fm10k_rx_desc *d)
85 ptype_table[FM10K_RXD_PKTTYPE_MASK >> FM10K_RXD_PKTTYPE_SHIFT]
86 __rte_cache_aligned = {
87 [FM10K_PKTTYPE_OTHER] = RTE_PTYPE_L2_ETHER,
88 [FM10K_PKTTYPE_IPV4] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4,
89 [FM10K_PKTTYPE_IPV4_EX] = RTE_PTYPE_L2_ETHER |
90 RTE_PTYPE_L3_IPV4_EXT,
91 [FM10K_PKTTYPE_IPV6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6,
92 [FM10K_PKTTYPE_IPV6_EX] = RTE_PTYPE_L2_ETHER |
93 RTE_PTYPE_L3_IPV6_EXT,
94 [FM10K_PKTTYPE_IPV4 | FM10K_PKTTYPE_TCP] = RTE_PTYPE_L2_ETHER |
95 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
96 [FM10K_PKTTYPE_IPV6 | FM10K_PKTTYPE_TCP] = RTE_PTYPE_L2_ETHER |
97 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
98 [FM10K_PKTTYPE_IPV4 | FM10K_PKTTYPE_UDP] = RTE_PTYPE_L2_ETHER |
99 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
100 [FM10K_PKTTYPE_IPV6 | FM10K_PKTTYPE_UDP] = RTE_PTYPE_L2_ETHER |
101 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
104 m->packet_type = ptype_table[(d->w.pkt_info & FM10K_RXD_PKTTYPE_MASK)
105 >> FM10K_RXD_PKTTYPE_SHIFT];
107 if (d->w.pkt_info & FM10K_RXD_RSSTYPE_MASK)
108 m->ol_flags |= PKT_RX_RSS_HASH;
110 if (unlikely((d->d.staterr &
111 (FM10K_RXD_STATUS_IPCS | FM10K_RXD_STATUS_IPE)) ==
112 (FM10K_RXD_STATUS_IPCS | FM10K_RXD_STATUS_IPE)))
113 m->ol_flags |= PKT_RX_IP_CKSUM_BAD;
115 m->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
117 if (unlikely((d->d.staterr &
118 (FM10K_RXD_STATUS_L4CS | FM10K_RXD_STATUS_L4E)) ==
119 (FM10K_RXD_STATUS_L4CS | FM10K_RXD_STATUS_L4E)))
120 m->ol_flags |= PKT_RX_L4_CKSUM_BAD;
122 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
126 fm10k_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
129 struct rte_mbuf *mbuf;
130 union fm10k_rx_desc desc;
131 struct fm10k_rx_queue *q = rx_queue;
137 next_dd = q->next_dd;
139 nb_pkts = RTE_MIN(nb_pkts, q->alloc_thresh);
140 for (count = 0; count < nb_pkts; ++count) {
141 if (!(q->hw_ring[next_dd].d.staterr & FM10K_RXD_STATUS_DD))
143 mbuf = q->sw_ring[next_dd];
144 desc = q->hw_ring[next_dd];
145 #ifdef RTE_LIBRTE_FM10K_DEBUG_RX
148 rte_pktmbuf_pkt_len(mbuf) = desc.w.length;
149 rte_pktmbuf_data_len(mbuf) = desc.w.length;
152 #ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE
153 rx_desc_to_ol_flags(mbuf, &desc);
156 mbuf->hash.rss = desc.d.rss;
158 * Packets in fm10k device always carry at least one VLAN tag.
159 * For those packets coming in without VLAN tag,
160 * the port default VLAN tag will be used.
161 * So, always PKT_RX_VLAN_PKT flag is set and vlan_tci
162 * is valid for each RX packet's mbuf.
164 mbuf->ol_flags |= PKT_RX_VLAN_PKT;
165 mbuf->vlan_tci = desc.w.vlan;
167 * mbuf->vlan_tci_outer is an idle field in fm10k driver,
168 * so it can be selected to store sglort value.
171 mbuf->vlan_tci_outer = rte_le_to_cpu_16(desc.w.sglort);
173 rx_pkts[count] = mbuf;
174 if (++next_dd == q->nb_desc) {
179 /* Prefetch next mbuf while processing current one. */
180 rte_prefetch0(q->sw_ring[next_dd]);
183 * When next RX descriptor is on a cache-line boundary,
184 * prefetch the next 4 RX descriptors and the next 8 pointers
187 if ((next_dd & 0x3) == 0) {
188 rte_prefetch0(&q->hw_ring[next_dd]);
189 rte_prefetch0(&q->sw_ring[next_dd]);
193 q->next_dd = next_dd;
195 if ((q->next_dd > q->next_trigger) || (alloc == 1)) {
196 ret = rte_mempool_get_bulk(q->mp,
197 (void **)&q->sw_ring[q->next_alloc],
200 if (unlikely(ret != 0)) {
201 uint8_t port = q->port_id;
202 PMD_RX_LOG(ERR, "Failed to alloc mbuf");
204 * Need to restore next_dd if we cannot allocate new
205 * buffers to replenish the old ones.
207 q->next_dd = (q->next_dd + q->nb_desc - count) %
209 rte_eth_devices[port].data->rx_mbuf_alloc_failed++;
213 for (; q->next_alloc <= q->next_trigger; ++q->next_alloc) {
214 mbuf = q->sw_ring[q->next_alloc];
216 /* setup static mbuf fields */
217 fm10k_pktmbuf_reset(mbuf, q->port_id);
219 /* write descriptor */
220 desc.q.pkt_addr = MBUF_DMA_ADDR_DEFAULT(mbuf);
221 desc.q.hdr_addr = MBUF_DMA_ADDR_DEFAULT(mbuf);
222 q->hw_ring[q->next_alloc] = desc;
224 FM10K_PCI_REG_WRITE(q->tail_ptr, q->next_trigger);
225 q->next_trigger += q->alloc_thresh;
226 if (q->next_trigger >= q->nb_desc) {
227 q->next_trigger = q->alloc_thresh - 1;
236 fm10k_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
239 struct rte_mbuf *mbuf;
240 union fm10k_rx_desc desc;
241 struct fm10k_rx_queue *q = rx_queue;
243 uint16_t nb_rcv, nb_seg;
246 struct rte_mbuf *first_seg = q->pkt_first_seg;
247 struct rte_mbuf *last_seg = q->pkt_last_seg;
250 next_dd = q->next_dd;
253 nb_seg = RTE_MIN(nb_pkts, q->alloc_thresh);
254 for (count = 0; count < nb_seg; count++) {
255 if (!(q->hw_ring[next_dd].d.staterr & FM10K_RXD_STATUS_DD))
257 mbuf = q->sw_ring[next_dd];
258 desc = q->hw_ring[next_dd];
259 #ifdef RTE_LIBRTE_FM10K_DEBUG_RX
263 if (++next_dd == q->nb_desc) {
268 /* Prefetch next mbuf while processing current one. */
269 rte_prefetch0(q->sw_ring[next_dd]);
272 * When next RX descriptor is on a cache-line boundary,
273 * prefetch the next 4 RX descriptors and the next 8 pointers
276 if ((next_dd & 0x3) == 0) {
277 rte_prefetch0(&q->hw_ring[next_dd]);
278 rte_prefetch0(&q->sw_ring[next_dd]);
281 /* Fill data length */
282 rte_pktmbuf_data_len(mbuf) = desc.w.length;
285 * If this is the first buffer of the received packet,
286 * set the pointer to the first mbuf of the packet and
287 * initialize its context.
288 * Otherwise, update the total length and the number of segments
289 * of the current scattered packet, and update the pointer to
290 * the last mbuf of the current packet.
294 first_seg->pkt_len = desc.w.length;
297 (uint16_t)(first_seg->pkt_len +
298 rte_pktmbuf_data_len(mbuf));
299 first_seg->nb_segs++;
300 last_seg->next = mbuf;
304 * If this is not the last buffer of the received packet,
305 * update the pointer to the last mbuf of the current scattered
306 * packet and continue to parse the RX ring.
308 if (!(desc.d.staterr & FM10K_RXD_STATUS_EOP)) {
313 first_seg->ol_flags = 0;
314 #ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE
315 rx_desc_to_ol_flags(first_seg, &desc);
317 first_seg->hash.rss = desc.d.rss;
319 * Packets in fm10k device always carry at least one VLAN tag.
320 * For those packets coming in without VLAN tag,
321 * the port default VLAN tag will be used.
322 * So, always PKT_RX_VLAN_PKT flag is set and vlan_tci
323 * is valid for each RX packet's mbuf.
325 first_seg->ol_flags |= PKT_RX_VLAN_PKT;
326 first_seg->vlan_tci = desc.w.vlan;
328 * mbuf->vlan_tci_outer is an idle field in fm10k driver,
329 * so it can be selected to store sglort value.
332 first_seg->vlan_tci_outer =
333 rte_le_to_cpu_16(desc.w.sglort);
335 /* Prefetch data of first segment, if configured to do so. */
336 rte_packet_prefetch((char *)first_seg->buf_addr +
337 first_seg->data_off);
340 * Store the mbuf address into the next entry of the array
341 * of returned packets.
343 rx_pkts[nb_rcv++] = first_seg;
346 * Setup receipt context for a new packet.
351 q->next_dd = next_dd;
353 if ((q->next_dd > q->next_trigger) || (alloc == 1)) {
354 ret = rte_mempool_get_bulk(q->mp,
355 (void **)&q->sw_ring[q->next_alloc],
358 if (unlikely(ret != 0)) {
359 uint8_t port = q->port_id;
360 PMD_RX_LOG(ERR, "Failed to alloc mbuf");
362 * Need to restore next_dd if we cannot allocate new
363 * buffers to replenish the old ones.
365 q->next_dd = (q->next_dd + q->nb_desc - count) %
367 rte_eth_devices[port].data->rx_mbuf_alloc_failed++;
371 for (; q->next_alloc <= q->next_trigger; ++q->next_alloc) {
372 mbuf = q->sw_ring[q->next_alloc];
374 /* setup static mbuf fields */
375 fm10k_pktmbuf_reset(mbuf, q->port_id);
377 /* write descriptor */
378 desc.q.pkt_addr = MBUF_DMA_ADDR_DEFAULT(mbuf);
379 desc.q.hdr_addr = MBUF_DMA_ADDR_DEFAULT(mbuf);
380 q->hw_ring[q->next_alloc] = desc;
382 FM10K_PCI_REG_WRITE(q->tail_ptr, q->next_trigger);
383 q->next_trigger += q->alloc_thresh;
384 if (q->next_trigger >= q->nb_desc) {
385 q->next_trigger = q->alloc_thresh - 1;
390 q->pkt_first_seg = first_seg;
391 q->pkt_last_seg = last_seg;
397 fm10k_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
399 volatile union fm10k_rx_desc *rxdp;
400 struct fm10k_rx_queue *rxq = rx_queue;
404 if (unlikely(offset >= rxq->nb_desc)) {
405 PMD_DRV_LOG(ERR, "Invalid RX descriptor offset %u", offset);
409 desc = rxq->next_dd + offset;
410 if (desc >= rxq->nb_desc)
411 desc -= rxq->nb_desc;
413 rxdp = &rxq->hw_ring[desc];
415 ret = !!(rxdp->w.status &
416 rte_cpu_to_le_16(FM10K_RXD_STATUS_DD));
422 * Free multiple TX mbuf at a time if they are in the same pool
424 * @txep: software desc ring index that starts to free
425 * @num: number of descs to free
428 static inline void tx_free_bulk_mbuf(struct rte_mbuf **txep, int num)
430 struct rte_mbuf *m, *free[RTE_FM10K_TX_MAX_FREE_BUF_SZ];
434 if (unlikely(num == 0))
437 m = __rte_pktmbuf_prefree_seg(txep[0]);
438 if (likely(m != NULL)) {
441 for (i = 1; i < num; i++) {
442 m = __rte_pktmbuf_prefree_seg(txep[i]);
443 if (likely(m != NULL)) {
444 if (likely(m->pool == free[0]->pool))
447 rte_mempool_put_bulk(free[0]->pool,
448 (void *)free, nb_free);
455 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
457 for (i = 1; i < num; i++) {
458 m = __rte_pktmbuf_prefree_seg(txep[i]);
460 rte_mempool_put(m->pool, m);
466 static inline void tx_free_descriptors(struct fm10k_tx_queue *q)
468 uint16_t next_rs, count = 0;
470 next_rs = fifo_peek(&q->rs_tracker);
471 if (!(q->hw_ring[next_rs].flags & FM10K_TXD_FLAG_DONE))
474 /* the DONE flag is set on this descriptor so remove the ID
475 * from the RS bit tracker and free the buffers */
476 fifo_remove(&q->rs_tracker);
478 /* wrap around? if so, free buffers from last_free up to but NOT
479 * including nb_desc */
480 if (q->last_free > next_rs) {
481 count = q->nb_desc - q->last_free;
482 tx_free_bulk_mbuf(&q->sw_ring[q->last_free], count);
486 /* adjust free descriptor count before the next loop */
487 q->nb_free += count + (next_rs + 1 - q->last_free);
489 /* free buffers from last_free, up to and including next_rs */
490 if (q->last_free <= next_rs) {
491 count = next_rs - q->last_free + 1;
492 tx_free_bulk_mbuf(&q->sw_ring[q->last_free], count);
493 q->last_free += count;
496 if (q->last_free == q->nb_desc)
500 static inline void tx_xmit_pkt(struct fm10k_tx_queue *q, struct rte_mbuf *mb)
503 uint8_t flags, hdrlen;
505 /* always set the LAST flag on the last descriptor used to
506 * transmit the packet */
507 flags = FM10K_TXD_FLAG_LAST;
508 last_id = q->next_free + mb->nb_segs - 1;
509 if (last_id >= q->nb_desc)
510 last_id = last_id - q->nb_desc;
512 /* but only set the RS flag on the last descriptor if rs_thresh
513 * descriptors will be used since the RS flag was last set */
514 if ((q->nb_used + mb->nb_segs) >= q->rs_thresh) {
515 flags |= FM10K_TXD_FLAG_RS;
516 fifo_insert(&q->rs_tracker, last_id);
519 q->nb_used = q->nb_used + mb->nb_segs;
522 q->nb_free -= mb->nb_segs;
524 q->hw_ring[q->next_free].flags = 0;
526 q->hw_ring[q->next_free].flags |= FM10K_TXD_FLAG_FTAG;
527 /* set checksum flags on first descriptor of packet. SCTP checksum
528 * offload is not supported, but we do not explicitly check for this
529 * case in favor of greatly simplified processing. */
530 if (mb->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK | PKT_TX_TCP_SEG))
531 q->hw_ring[q->next_free].flags |= FM10K_TXD_FLAG_CSUM;
533 /* set vlan if requested */
534 if (mb->ol_flags & PKT_TX_VLAN_PKT)
535 q->hw_ring[q->next_free].vlan = mb->vlan_tci;
537 q->sw_ring[q->next_free] = mb;
538 q->hw_ring[q->next_free].buffer_addr =
539 rte_cpu_to_le_64(MBUF_DMA_ADDR(mb));
540 q->hw_ring[q->next_free].buflen =
541 rte_cpu_to_le_16(rte_pktmbuf_data_len(mb));
543 if (mb->ol_flags & PKT_TX_TCP_SEG) {
544 hdrlen = mb->outer_l2_len + mb->outer_l3_len + mb->l2_len +
545 mb->l3_len + mb->l4_len;
546 if (q->hw_ring[q->next_free].flags & FM10K_TXD_FLAG_FTAG)
547 hdrlen += sizeof(struct fm10k_ftag);
549 if (likely((hdrlen >= FM10K_TSO_MIN_HEADERLEN) &&
550 (hdrlen <= FM10K_TSO_MAX_HEADERLEN) &&
551 (mb->tso_segsz >= FM10K_TSO_MINMSS))) {
552 q->hw_ring[q->next_free].mss = mb->tso_segsz;
553 q->hw_ring[q->next_free].hdrlen = hdrlen;
557 if (++q->next_free == q->nb_desc)
560 /* fill up the rings */
561 for (mb = mb->next; mb != NULL; mb = mb->next) {
562 q->sw_ring[q->next_free] = mb;
563 q->hw_ring[q->next_free].buffer_addr =
564 rte_cpu_to_le_64(MBUF_DMA_ADDR(mb));
565 q->hw_ring[q->next_free].buflen =
566 rte_cpu_to_le_16(rte_pktmbuf_data_len(mb));
567 q->hw_ring[q->next_free].flags = 0;
568 if (++q->next_free == q->nb_desc)
572 q->hw_ring[last_id].flags |= flags;
576 fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
579 struct fm10k_tx_queue *q = tx_queue;
583 for (count = 0; count < nb_pkts; ++count) {
586 /* running low on descriptors? try to free some... */
587 if (q->nb_free < q->free_thresh)
588 tx_free_descriptors(q);
590 /* make sure there are enough free descriptors to transmit the
591 * entire packet before doing anything */
592 if (q->nb_free < mb->nb_segs)
595 /* sanity check to make sure the mbuf is valid */
596 if ((mb->nb_segs == 0) ||
597 ((mb->nb_segs > 1) && (mb->next == NULL)))
600 /* process the packet */
604 /* update the tail pointer if any packets were processed */
605 if (likely(count > 0))
606 FM10K_PCI_REG_WRITE(q->tail_ptr, q->next_free);
612 fm10k_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
618 for (i = 0; i < nb_pkts; i++) {
621 if ((m->ol_flags & PKT_TX_TCP_SEG) &&
622 (m->tso_segsz < FM10K_TSO_MINMSS)) {
627 if (m->ol_flags & FM10K_TX_OFFLOAD_NOTSUP_MASK) {
628 rte_errno = -ENOTSUP;
632 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
633 ret = rte_validate_tx_offload(m);
639 ret = rte_net_intel_cksum_prepare(m);