e95886597534786bf257408532856efa0566df25
[dpdk.git] / drivers / net / fm10k / fm10k_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <inttypes.h>
35
36 #include <rte_ethdev.h>
37 #include <rte_common.h>
38 #include "fm10k.h"
39 #include "base/fm10k_type.h"
40
41 #ifdef RTE_PMD_PACKET_PREFETCH
42 #define rte_packet_prefetch(p)  rte_prefetch1(p)
43 #else
44 #define rte_packet_prefetch(p)  do {} while (0)
45 #endif
46
47 #ifdef RTE_LIBRTE_FM10K_DEBUG_RX
48 static inline void dump_rxd(union fm10k_rx_desc *rxd)
49 {
50         PMD_RX_LOG(DEBUG, "+----------------|----------------+");
51         PMD_RX_LOG(DEBUG, "|     GLORT      | PKT HDR & TYPE |");
52         PMD_RX_LOG(DEBUG, "|   0x%08x   |   0x%08x   |", rxd->d.glort,
53                         rxd->d.data);
54         PMD_RX_LOG(DEBUG, "+----------------|----------------+");
55         PMD_RX_LOG(DEBUG, "|   VLAN & LEN   |     STATUS     |");
56         PMD_RX_LOG(DEBUG, "|   0x%08x   |   0x%08x   |", rxd->d.vlan_len,
57                         rxd->d.staterr);
58         PMD_RX_LOG(DEBUG, "+----------------|----------------+");
59         PMD_RX_LOG(DEBUG, "|    RESERVED    |    RSS_HASH    |");
60         PMD_RX_LOG(DEBUG, "|   0x%08x   |   0x%08x   |", 0, rxd->d.rss);
61         PMD_RX_LOG(DEBUG, "+----------------|----------------+");
62         PMD_RX_LOG(DEBUG, "|            TIME TAG             |");
63         PMD_RX_LOG(DEBUG, "|       0x%016"PRIx64"        |", rxd->q.timestamp);
64         PMD_RX_LOG(DEBUG, "+----------------|----------------+");
65 }
66 #endif
67
68 static inline void
69 rx_desc_to_ol_flags(struct rte_mbuf *m, const union fm10k_rx_desc *d)
70 {
71         static const uint32_t
72                 ptype_table[FM10K_RXD_PKTTYPE_MASK >> FM10K_RXD_PKTTYPE_SHIFT]
73                         __rte_cache_aligned = {
74                 [FM10K_PKTTYPE_OTHER] = RTE_PTYPE_L2_ETHER,
75                 [FM10K_PKTTYPE_IPV4] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4,
76                 [FM10K_PKTTYPE_IPV4_EX] = RTE_PTYPE_L2_ETHER |
77                         RTE_PTYPE_L3_IPV4_EXT,
78                 [FM10K_PKTTYPE_IPV6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6,
79                 [FM10K_PKTTYPE_IPV6_EX] = RTE_PTYPE_L2_ETHER |
80                         RTE_PTYPE_L3_IPV6_EXT,
81                 [FM10K_PKTTYPE_IPV4 | FM10K_PKTTYPE_TCP] = RTE_PTYPE_L2_ETHER |
82                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
83                 [FM10K_PKTTYPE_IPV6 | FM10K_PKTTYPE_TCP] = RTE_PTYPE_L2_ETHER |
84                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
85                 [FM10K_PKTTYPE_IPV4 | FM10K_PKTTYPE_UDP] = RTE_PTYPE_L2_ETHER |
86                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
87                 [FM10K_PKTTYPE_IPV6 | FM10K_PKTTYPE_UDP] = RTE_PTYPE_L2_ETHER |
88                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
89         };
90
91         m->packet_type = ptype_table[(d->w.pkt_info & FM10K_RXD_PKTTYPE_MASK)
92                                                 >> FM10K_RXD_PKTTYPE_SHIFT];
93
94         if (d->w.pkt_info & FM10K_RXD_RSSTYPE_MASK)
95                 m->ol_flags |= PKT_RX_RSS_HASH;
96
97         if (unlikely((d->d.staterr &
98                 (FM10K_RXD_STATUS_IPCS | FM10K_RXD_STATUS_IPE)) ==
99                 (FM10K_RXD_STATUS_IPCS | FM10K_RXD_STATUS_IPE)))
100                 m->ol_flags |= PKT_RX_IP_CKSUM_BAD;
101
102         if (unlikely((d->d.staterr &
103                 (FM10K_RXD_STATUS_L4CS | FM10K_RXD_STATUS_L4E)) ==
104                 (FM10K_RXD_STATUS_L4CS | FM10K_RXD_STATUS_L4E)))
105                 m->ol_flags |= PKT_RX_L4_CKSUM_BAD;
106
107         if (unlikely(d->d.staterr & FM10K_RXD_STATUS_HBO))
108                 m->ol_flags |= PKT_RX_HBUF_OVERFLOW;
109
110         if (unlikely(d->d.staterr & FM10K_RXD_STATUS_RXE))
111                 m->ol_flags |= PKT_RX_RECIP_ERR;
112 }
113
114 uint16_t
115 fm10k_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
116         uint16_t nb_pkts)
117 {
118         struct rte_mbuf *mbuf;
119         union fm10k_rx_desc desc;
120         struct fm10k_rx_queue *q = rx_queue;
121         uint16_t count = 0;
122         int alloc = 0;
123         uint16_t next_dd;
124         int ret;
125
126         next_dd = q->next_dd;
127
128         nb_pkts = RTE_MIN(nb_pkts, q->alloc_thresh);
129         for (count = 0; count < nb_pkts; ++count) {
130                 mbuf = q->sw_ring[next_dd];
131                 desc = q->hw_ring[next_dd];
132                 if (!(desc.d.staterr & FM10K_RXD_STATUS_DD))
133                         break;
134 #ifdef RTE_LIBRTE_FM10K_DEBUG_RX
135                 dump_rxd(&desc);
136 #endif
137                 rte_pktmbuf_pkt_len(mbuf) = desc.w.length;
138                 rte_pktmbuf_data_len(mbuf) = desc.w.length;
139
140                 mbuf->ol_flags = 0;
141 #ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE
142                 rx_desc_to_ol_flags(mbuf, &desc);
143 #endif
144
145                 mbuf->hash.rss = desc.d.rss;
146                 /**
147                  * Packets in fm10k device always carry at least one VLAN tag.
148                  * For those packets coming in without VLAN tag,
149                  * the port default VLAN tag will be used.
150                  * So, always PKT_RX_VLAN_PKT flag is set and vlan_tci
151                  * is valid for each RX packet's mbuf.
152                  */
153                 mbuf->ol_flags |= PKT_RX_VLAN_PKT;
154                 mbuf->vlan_tci = desc.w.vlan;
155
156                 rx_pkts[count] = mbuf;
157                 if (++next_dd == q->nb_desc) {
158                         next_dd = 0;
159                         alloc = 1;
160                 }
161
162                 /* Prefetch next mbuf while processing current one. */
163                 rte_prefetch0(q->sw_ring[next_dd]);
164
165                 /*
166                  * When next RX descriptor is on a cache-line boundary,
167                  * prefetch the next 4 RX descriptors and the next 8 pointers
168                  * to mbufs.
169                  */
170                 if ((next_dd & 0x3) == 0) {
171                         rte_prefetch0(&q->hw_ring[next_dd]);
172                         rte_prefetch0(&q->sw_ring[next_dd]);
173                 }
174         }
175
176         q->next_dd = next_dd;
177
178         if ((q->next_dd > q->next_trigger) || (alloc == 1)) {
179                 ret = rte_mempool_get_bulk(q->mp,
180                                         (void **)&q->sw_ring[q->next_alloc],
181                                         q->alloc_thresh);
182
183                 if (unlikely(ret != 0)) {
184                         uint8_t port = q->port_id;
185                         PMD_RX_LOG(ERR, "Failed to alloc mbuf");
186                         /*
187                          * Need to restore next_dd if we cannot allocate new
188                          * buffers to replenish the old ones.
189                          */
190                         q->next_dd = (q->next_dd + q->nb_desc - count) %
191                                                                 q->nb_desc;
192                         rte_eth_devices[port].data->rx_mbuf_alloc_failed++;
193                         return 0;
194                 }
195
196                 for (; q->next_alloc <= q->next_trigger; ++q->next_alloc) {
197                         mbuf = q->sw_ring[q->next_alloc];
198
199                         /* setup static mbuf fields */
200                         fm10k_pktmbuf_reset(mbuf, q->port_id);
201
202                         /* write descriptor */
203                         desc.q.pkt_addr = MBUF_DMA_ADDR_DEFAULT(mbuf);
204                         desc.q.hdr_addr = MBUF_DMA_ADDR_DEFAULT(mbuf);
205                         q->hw_ring[q->next_alloc] = desc;
206                 }
207                 FM10K_PCI_REG_WRITE(q->tail_ptr, q->next_trigger);
208                 q->next_trigger += q->alloc_thresh;
209                 if (q->next_trigger >= q->nb_desc) {
210                         q->next_trigger = q->alloc_thresh - 1;
211                         q->next_alloc = 0;
212                 }
213         }
214
215         return count;
216 }
217
218 uint16_t
219 fm10k_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
220                                 uint16_t nb_pkts)
221 {
222         struct rte_mbuf *mbuf;
223         union fm10k_rx_desc desc;
224         struct fm10k_rx_queue *q = rx_queue;
225         uint16_t count = 0;
226         uint16_t nb_rcv, nb_seg;
227         int alloc = 0;
228         uint16_t next_dd;
229         struct rte_mbuf *first_seg = q->pkt_first_seg;
230         struct rte_mbuf *last_seg = q->pkt_last_seg;
231         int ret;
232
233         next_dd = q->next_dd;
234         nb_rcv = 0;
235
236         nb_seg = RTE_MIN(nb_pkts, q->alloc_thresh);
237         for (count = 0; count < nb_seg; count++) {
238                 mbuf = q->sw_ring[next_dd];
239                 desc = q->hw_ring[next_dd];
240                 if (!(desc.d.staterr & FM10K_RXD_STATUS_DD))
241                         break;
242 #ifdef RTE_LIBRTE_FM10K_DEBUG_RX
243                 dump_rxd(&desc);
244 #endif
245
246                 if (++next_dd == q->nb_desc) {
247                         next_dd = 0;
248                         alloc = 1;
249                 }
250
251                 /* Prefetch next mbuf while processing current one. */
252                 rte_prefetch0(q->sw_ring[next_dd]);
253
254                 /*
255                  * When next RX descriptor is on a cache-line boundary,
256                  * prefetch the next 4 RX descriptors and the next 8 pointers
257                  * to mbufs.
258                  */
259                 if ((next_dd & 0x3) == 0) {
260                         rte_prefetch0(&q->hw_ring[next_dd]);
261                         rte_prefetch0(&q->sw_ring[next_dd]);
262                 }
263
264                 /* Fill data length */
265                 rte_pktmbuf_data_len(mbuf) = desc.w.length;
266
267                 /*
268                  * If this is the first buffer of the received packet,
269                  * set the pointer to the first mbuf of the packet and
270                  * initialize its context.
271                  * Otherwise, update the total length and the number of segments
272                  * of the current scattered packet, and update the pointer to
273                  * the last mbuf of the current packet.
274                  */
275                 if (!first_seg) {
276                         first_seg = mbuf;
277                         first_seg->pkt_len = desc.w.length;
278                 } else {
279                         first_seg->pkt_len =
280                                         (uint16_t)(first_seg->pkt_len +
281                                         rte_pktmbuf_data_len(mbuf));
282                         first_seg->nb_segs++;
283                         last_seg->next = mbuf;
284                 }
285
286                 /*
287                  * If this is not the last buffer of the received packet,
288                  * update the pointer to the last mbuf of the current scattered
289                  * packet and continue to parse the RX ring.
290                  */
291                 if (!(desc.d.staterr & FM10K_RXD_STATUS_EOP)) {
292                         last_seg = mbuf;
293                         continue;
294                 }
295
296                 first_seg->ol_flags = 0;
297 #ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE
298                 rx_desc_to_ol_flags(first_seg, &desc);
299 #endif
300                 first_seg->hash.rss = desc.d.rss;
301                 /**
302                  * Packets in fm10k device always carry at least one VLAN tag.
303                  * For those packets coming in without VLAN tag,
304                  * the port default VLAN tag will be used.
305                  * So, always PKT_RX_VLAN_PKT flag is set and vlan_tci
306                  * is valid for each RX packet's mbuf.
307                  */
308                 mbuf->ol_flags |= PKT_RX_VLAN_PKT;
309                 first_seg->vlan_tci = desc.w.vlan;
310
311                 /* Prefetch data of first segment, if configured to do so. */
312                 rte_packet_prefetch((char *)first_seg->buf_addr +
313                         first_seg->data_off);
314
315                 /*
316                  * Store the mbuf address into the next entry of the array
317                  * of returned packets.
318                  */
319                 rx_pkts[nb_rcv++] = first_seg;
320
321                 /*
322                  * Setup receipt context for a new packet.
323                  */
324                 first_seg = NULL;
325         }
326
327         q->next_dd = next_dd;
328
329         if ((q->next_dd > q->next_trigger) || (alloc == 1)) {
330                 ret = rte_mempool_get_bulk(q->mp,
331                                         (void **)&q->sw_ring[q->next_alloc],
332                                         q->alloc_thresh);
333
334                 if (unlikely(ret != 0)) {
335                         uint8_t port = q->port_id;
336                         PMD_RX_LOG(ERR, "Failed to alloc mbuf");
337                         /*
338                          * Need to restore next_dd if we cannot allocate new
339                          * buffers to replenish the old ones.
340                          */
341                         q->next_dd = (q->next_dd + q->nb_desc - count) %
342                                                                 q->nb_desc;
343                         rte_eth_devices[port].data->rx_mbuf_alloc_failed++;
344                         return 0;
345                 }
346
347                 for (; q->next_alloc <= q->next_trigger; ++q->next_alloc) {
348                         mbuf = q->sw_ring[q->next_alloc];
349
350                         /* setup static mbuf fields */
351                         fm10k_pktmbuf_reset(mbuf, q->port_id);
352
353                         /* write descriptor */
354                         desc.q.pkt_addr = MBUF_DMA_ADDR_DEFAULT(mbuf);
355                         desc.q.hdr_addr = MBUF_DMA_ADDR_DEFAULT(mbuf);
356                         q->hw_ring[q->next_alloc] = desc;
357                 }
358                 FM10K_PCI_REG_WRITE(q->tail_ptr, q->next_trigger);
359                 q->next_trigger += q->alloc_thresh;
360                 if (q->next_trigger >= q->nb_desc) {
361                         q->next_trigger = q->alloc_thresh - 1;
362                         q->next_alloc = 0;
363                 }
364         }
365
366         q->pkt_first_seg = first_seg;
367         q->pkt_last_seg = last_seg;
368
369         return nb_rcv;
370 }
371
372 static inline void tx_free_descriptors(struct fm10k_tx_queue *q)
373 {
374         uint16_t next_rs, count = 0;
375
376         next_rs = fifo_peek(&q->rs_tracker);
377         if (!(q->hw_ring[next_rs].flags & FM10K_TXD_FLAG_DONE))
378                 return;
379
380         /* the DONE flag is set on this descriptor so remove the ID
381          * from the RS bit tracker and free the buffers */
382         fifo_remove(&q->rs_tracker);
383
384         /* wrap around? if so, free buffers from last_free up to but NOT
385          * including nb_desc */
386         if (q->last_free > next_rs) {
387                 count = q->nb_desc - q->last_free;
388                 while (q->last_free < q->nb_desc) {
389                         rte_pktmbuf_free_seg(q->sw_ring[q->last_free]);
390                         q->sw_ring[q->last_free] = NULL;
391                         ++q->last_free;
392                 }
393                 q->last_free = 0;
394         }
395
396         /* adjust free descriptor count before the next loop */
397         q->nb_free += count + (next_rs + 1 - q->last_free);
398
399         /* free buffers from last_free, up to and including next_rs */
400         while (q->last_free <= next_rs) {
401                 rte_pktmbuf_free_seg(q->sw_ring[q->last_free]);
402                 q->sw_ring[q->last_free] = NULL;
403                 ++q->last_free;
404         }
405
406         if (q->last_free == q->nb_desc)
407                 q->last_free = 0;
408 }
409
410 static inline void tx_xmit_pkt(struct fm10k_tx_queue *q, struct rte_mbuf *mb)
411 {
412         uint16_t last_id;
413         uint8_t flags, hdrlen;
414
415         /* always set the LAST flag on the last descriptor used to
416          * transmit the packet */
417         flags = FM10K_TXD_FLAG_LAST;
418         last_id = q->next_free + mb->nb_segs - 1;
419         if (last_id >= q->nb_desc)
420                 last_id = last_id - q->nb_desc;
421
422         /* but only set the RS flag on the last descriptor if rs_thresh
423          * descriptors will be used since the RS flag was last set */
424         if ((q->nb_used + mb->nb_segs) >= q->rs_thresh) {
425                 flags |= FM10K_TXD_FLAG_RS;
426                 fifo_insert(&q->rs_tracker, last_id);
427                 q->nb_used = 0;
428         } else {
429                 q->nb_used = q->nb_used + mb->nb_segs;
430         }
431
432         q->nb_free -= mb->nb_segs;
433
434         q->hw_ring[q->next_free].flags = 0;
435         /* set checksum flags on first descriptor of packet. SCTP checksum
436          * offload is not supported, but we do not explicitly check for this
437          * case in favor of greatly simplified processing. */
438         if (mb->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK | PKT_TX_TCP_SEG))
439                 q->hw_ring[q->next_free].flags |= FM10K_TXD_FLAG_CSUM;
440
441         /* set vlan if requested */
442         if (mb->ol_flags & PKT_TX_VLAN_PKT)
443                 q->hw_ring[q->next_free].vlan = mb->vlan_tci;
444
445         q->sw_ring[q->next_free] = mb;
446         q->hw_ring[q->next_free].buffer_addr =
447                         rte_cpu_to_le_64(MBUF_DMA_ADDR(mb));
448         q->hw_ring[q->next_free].buflen =
449                         rte_cpu_to_le_16(rte_pktmbuf_data_len(mb));
450
451         if (mb->ol_flags & PKT_TX_TCP_SEG) {
452                 hdrlen = mb->outer_l2_len + mb->outer_l3_len + mb->l2_len +
453                         mb->l3_len + mb->l4_len;
454                 if (q->hw_ring[q->next_free].flags & FM10K_TXD_FLAG_FTAG)
455                         hdrlen += sizeof(struct fm10k_ftag);
456
457                 if (likely((hdrlen >= FM10K_TSO_MIN_HEADERLEN) &&
458                                 (hdrlen <= FM10K_TSO_MAX_HEADERLEN) &&
459                                 (mb->tso_segsz >= FM10K_TSO_MINMSS))) {
460                         q->hw_ring[q->next_free].mss = mb->tso_segsz;
461                         q->hw_ring[q->next_free].hdrlen = hdrlen;
462                 }
463         }
464
465         if (++q->next_free == q->nb_desc)
466                 q->next_free = 0;
467
468         /* fill up the rings */
469         for (mb = mb->next; mb != NULL; mb = mb->next) {
470                 q->sw_ring[q->next_free] = mb;
471                 q->hw_ring[q->next_free].buffer_addr =
472                                 rte_cpu_to_le_64(MBUF_DMA_ADDR(mb));
473                 q->hw_ring[q->next_free].buflen =
474                                 rte_cpu_to_le_16(rte_pktmbuf_data_len(mb));
475                 q->hw_ring[q->next_free].flags = 0;
476                 if (++q->next_free == q->nb_desc)
477                         q->next_free = 0;
478         }
479
480         q->hw_ring[last_id].flags |= flags;
481 }
482
483 uint16_t
484 fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
485         uint16_t nb_pkts)
486 {
487         struct fm10k_tx_queue *q = tx_queue;
488         struct rte_mbuf *mb;
489         uint16_t count;
490
491         for (count = 0; count < nb_pkts; ++count) {
492                 mb = tx_pkts[count];
493
494                 /* running low on descriptors? try to free some... */
495                 if (q->nb_free < q->free_thresh)
496                         tx_free_descriptors(q);
497
498                 /* make sure there are enough free descriptors to transmit the
499                  * entire packet before doing anything */
500                 if (q->nb_free < mb->nb_segs)
501                         break;
502
503                 /* sanity check to make sure the mbuf is valid */
504                 if ((mb->nb_segs == 0) ||
505                     ((mb->nb_segs > 1) && (mb->next == NULL)))
506                         break;
507
508                 /* process the packet */
509                 tx_xmit_pkt(q, mb);
510         }
511
512         /* update the tail pointer if any packets were processed */
513         if (likely(count > 0))
514                 FM10K_PCI_REG_WRITE(q->tail_ptr, q->next_free);
515
516         return count;
517 }