net/enic: improve packet type identification
[dpdk.git] / drivers / net / enic / enic_rxtx.c
1 /* Copyright 2008-2016 Cisco Systems, Inc.  All rights reserved.
2  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
3  *
4  * Copyright (c) 2014, Cisco Systems, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *
14  * 2. Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in
16  * the documentation and/or other materials provided with the
17  * distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <rte_mbuf.h>
34 #include <rte_ethdev.h>
35 #include <rte_prefetch.h>
36
37 #include "enic_compat.h"
38 #include "rq_enet_desc.h"
39 #include "enic.h"
40
41 #define RTE_PMD_USE_PREFETCH
42
43 #ifdef RTE_PMD_USE_PREFETCH
44 /*Prefetch a cache line into all cache levels. */
45 #define rte_enic_prefetch(p) rte_prefetch0(p)
46 #else
47 #define rte_enic_prefetch(p) do {} while (0)
48 #endif
49
50 #ifdef RTE_PMD_PACKET_PREFETCH
51 #define rte_packet_prefetch(p) rte_prefetch1(p)
52 #else
53 #define rte_packet_prefetch(p) do {} while (0)
54 #endif
55
56 static inline uint16_t
57 enic_cq_rx_desc_ciflags(struct cq_enet_rq_desc *crd)
58 {
59         return le16_to_cpu(crd->completed_index_flags) & ~CQ_DESC_COMP_NDX_MASK;
60 }
61
62 static inline uint16_t
63 enic_cq_rx_desc_bwflags(struct cq_enet_rq_desc *crd)
64 {
65         return le16_to_cpu(crd->bytes_written_flags) &
66                            ~CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK;
67 }
68
69 static inline uint8_t
70 enic_cq_rx_desc_packet_error(uint16_t bwflags)
71 {
72         return (bwflags & CQ_ENET_RQ_DESC_FLAGS_TRUNCATED) ==
73                 CQ_ENET_RQ_DESC_FLAGS_TRUNCATED;
74 }
75
76 static inline uint8_t
77 enic_cq_rx_desc_eop(uint16_t ciflags)
78 {
79         return (ciflags & CQ_ENET_RQ_DESC_FLAGS_EOP)
80                 == CQ_ENET_RQ_DESC_FLAGS_EOP;
81 }
82
83 static inline uint8_t
84 enic_cq_rx_desc_csum_not_calc(struct cq_enet_rq_desc *cqrd)
85 {
86         return (le16_to_cpu(cqrd->q_number_rss_type_flags) &
87                 CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC) ==
88                 CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC;
89 }
90
91 static inline uint8_t
92 enic_cq_rx_desc_ipv4_csum_ok(struct cq_enet_rq_desc *cqrd)
93 {
94         return (cqrd->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK) ==
95                 CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK;
96 }
97
98 static inline uint8_t
99 enic_cq_rx_desc_tcp_udp_csum_ok(struct cq_enet_rq_desc *cqrd)
100 {
101         return (cqrd->flags & CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK) ==
102                 CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK;
103 }
104
105 static inline uint8_t
106 enic_cq_rx_desc_rss_type(struct cq_enet_rq_desc *cqrd)
107 {
108         return (uint8_t)((le16_to_cpu(cqrd->q_number_rss_type_flags) >>
109                 CQ_DESC_Q_NUM_BITS) & CQ_ENET_RQ_DESC_RSS_TYPE_MASK);
110 }
111
112 static inline uint32_t
113 enic_cq_rx_desc_rss_hash(struct cq_enet_rq_desc *cqrd)
114 {
115         return le32_to_cpu(cqrd->rss_hash);
116 }
117
118 static inline uint16_t
119 enic_cq_rx_desc_vlan(struct cq_enet_rq_desc *cqrd)
120 {
121         return le16_to_cpu(cqrd->vlan);
122 }
123
124 static inline uint16_t
125 enic_cq_rx_desc_n_bytes(struct cq_desc *cqd)
126 {
127         struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd;
128         return le16_to_cpu(cqrd->bytes_written_flags) &
129                 CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK;
130 }
131
132 static inline uint8_t
133 enic_cq_rx_check_err(struct cq_desc *cqd)
134 {
135         struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd;
136         uint16_t bwflags;
137
138         bwflags = enic_cq_rx_desc_bwflags(cqrd);
139         if (unlikely(enic_cq_rx_desc_packet_error(bwflags)))
140                 return 1;
141         return 0;
142 }
143
144 /* Lookup table to translate RX CQ flags to mbuf flags. */
145 static inline uint32_t
146 enic_cq_rx_flags_to_pkt_type(struct cq_desc *cqd)
147 {
148         struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd;
149         uint8_t cqrd_flags = cqrd->flags;
150         static const uint32_t cq_type_table[128] __rte_cache_aligned = {
151                 [0x00] = RTE_PTYPE_UNKNOWN,
152                 [0x20] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN
153                           | RTE_PTYPE_L4_NONFRAG,
154                 [0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN
155                           | RTE_PTYPE_L4_UDP,
156                 [0x24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN
157                           | RTE_PTYPE_L4_TCP,
158                 [0x60] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN
159                           | RTE_PTYPE_L4_FRAG,
160                 [0x62] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN
161                           | RTE_PTYPE_L4_UDP,
162                 [0x64] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN
163                           | RTE_PTYPE_L4_TCP,
164                 [0x10] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN
165                           | RTE_PTYPE_L4_NONFRAG,
166                 [0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN
167                           | RTE_PTYPE_L4_UDP,
168                 [0x14] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN
169                           | RTE_PTYPE_L4_TCP,
170                 [0x50] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN
171                           | RTE_PTYPE_L4_FRAG,
172                 [0x52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN
173                           | RTE_PTYPE_L4_UDP,
174                 [0x54] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN
175                           | RTE_PTYPE_L4_TCP,
176                 /* All others reserved */
177         };
178         cqrd_flags &= CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT
179                 | CQ_ENET_RQ_DESC_FLAGS_IPV4 | CQ_ENET_RQ_DESC_FLAGS_IPV6
180                 | CQ_ENET_RQ_DESC_FLAGS_TCP | CQ_ENET_RQ_DESC_FLAGS_UDP;
181         return cq_type_table[cqrd_flags];
182 }
183
184 static inline void
185 enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf)
186 {
187         struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd;
188         uint16_t ciflags, bwflags, pkt_flags = 0;
189         ciflags = enic_cq_rx_desc_ciflags(cqrd);
190         bwflags = enic_cq_rx_desc_bwflags(cqrd);
191
192         mbuf->ol_flags = 0;
193
194         /* flags are meaningless if !EOP */
195         if (unlikely(!enic_cq_rx_desc_eop(ciflags)))
196                 goto mbuf_flags_done;
197
198         /* VLAN stripping */
199         if (bwflags & CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) {
200                 pkt_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
201                 mbuf->vlan_tci = enic_cq_rx_desc_vlan(cqrd);
202         } else {
203                 mbuf->vlan_tci = 0;
204         }
205
206         /* RSS flag */
207         if (enic_cq_rx_desc_rss_type(cqrd)) {
208                 pkt_flags |= PKT_RX_RSS_HASH;
209                 mbuf->hash.rss = enic_cq_rx_desc_rss_hash(cqrd);
210         }
211
212         /* checksum flags */
213         if (!enic_cq_rx_desc_csum_not_calc(cqrd) &&
214                 (mbuf->packet_type & RTE_PTYPE_L3_IPV4)) {
215                 if (unlikely(!enic_cq_rx_desc_ipv4_csum_ok(cqrd)))
216                         pkt_flags |= PKT_RX_IP_CKSUM_BAD;
217                 if (mbuf->packet_type & (RTE_PTYPE_L4_UDP | RTE_PTYPE_L4_TCP)) {
218                         if (unlikely(!enic_cq_rx_desc_tcp_udp_csum_ok(cqrd)))
219                                 pkt_flags |= PKT_RX_L4_CKSUM_BAD;
220                 }
221         }
222
223  mbuf_flags_done:
224         mbuf->ol_flags = pkt_flags;
225 }
226
227 uint16_t
228 enic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
229                uint16_t nb_pkts)
230 {
231         struct vnic_rq *rq = rx_queue;
232         struct enic *enic = vnic_dev_priv(rq->vdev);
233         unsigned int rx_id;
234         struct rte_mbuf *nmb, *rxmb;
235         uint16_t nb_rx = 0, nb_err = 0;
236         uint16_t nb_hold;
237         struct vnic_cq *cq;
238         volatile struct cq_desc *cqd_ptr;
239         uint8_t color;
240
241         cq = &enic->cq[enic_cq_rq(enic, rq->index)];
242         rx_id = cq->to_clean;           /* index of cqd, rqd, mbuf_table */
243         cqd_ptr = (struct cq_desc *)(cq->ring.descs) + rx_id;
244
245         nb_hold = rq->rx_nb_hold;       /* mbufs held by software */
246
247         while (nb_rx < nb_pkts) {
248                 volatile struct rq_enet_desc *rqd_ptr;
249                 dma_addr_t dma_addr;
250                 struct cq_desc cqd;
251                 uint8_t packet_error;
252
253                 /* Check for pkts available */
254                 color = (cqd_ptr->type_color >> CQ_DESC_COLOR_SHIFT)
255                         & CQ_DESC_COLOR_MASK;
256                 if (color == cq->last_color)
257                         break;
258
259                 /* Get the cq descriptor and rq pointer */
260                 cqd = *cqd_ptr;
261                 rqd_ptr = (struct rq_enet_desc *)(rq->ring.descs) + rx_id;
262
263                 /* allocate a new mbuf */
264                 nmb = rte_mbuf_raw_alloc(rq->mp);
265                 if (nmb == NULL) {
266                         rte_atomic64_inc(&enic->soft_stats.rx_nombuf);
267                         break;
268                 }
269
270                 /* A packet error means descriptor and data are untrusted */
271                 packet_error = enic_cq_rx_check_err(&cqd);
272
273                 /* Get the mbuf to return and replace with one just allocated */
274                 rxmb = rq->mbuf_ring[rx_id];
275                 rq->mbuf_ring[rx_id] = nmb;
276
277                 /* Increment cqd, rqd, mbuf_table index */
278                 rx_id++;
279                 if (unlikely(rx_id == rq->ring.desc_count)) {
280                         rx_id = 0;
281                         cq->last_color = cq->last_color ? 0 : 1;
282                 }
283
284                 /* Prefetch next mbuf & desc while processing current one */
285                 cqd_ptr = (struct cq_desc *)(cq->ring.descs) + rx_id;
286                 rte_enic_prefetch(cqd_ptr);
287                 rte_enic_prefetch(rq->mbuf_ring[rx_id]);
288                 rte_enic_prefetch((struct rq_enet_desc *)(rq->ring.descs)
289                                  + rx_id);
290
291                 /* Push descriptor for newly allocated mbuf */
292                 dma_addr = (dma_addr_t)(nmb->buf_physaddr
293                            + RTE_PKTMBUF_HEADROOM);
294                 rqd_ptr->address = rte_cpu_to_le_64(dma_addr);
295                 rqd_ptr->length_type = cpu_to_le16(nmb->buf_len
296                                        - RTE_PKTMBUF_HEADROOM);
297
298                 /* Drop incoming bad packet */
299                 if (unlikely(packet_error)) {
300                         rte_pktmbuf_free(rxmb);
301                         nb_err++;
302                         continue;
303                 }
304
305                 /* Fill in the rest of the mbuf */
306                 rxmb->data_off = RTE_PKTMBUF_HEADROOM;
307                 rxmb->nb_segs = 1;
308                 rxmb->next = NULL;
309                 rxmb->port = enic->port_id;
310                 rxmb->pkt_len = enic_cq_rx_desc_n_bytes(&cqd);
311                 rxmb->packet_type = enic_cq_rx_flags_to_pkt_type(&cqd);
312                 enic_cq_rx_to_pkt_flags(&cqd, rxmb);
313                 rxmb->data_len = rxmb->pkt_len;
314
315                 /* prefetch mbuf data for caller */
316                 rte_packet_prefetch(RTE_PTR_ADD(rxmb->buf_addr,
317                                     RTE_PKTMBUF_HEADROOM));
318
319                 /* store the mbuf address into the next entry of the array */
320                 rx_pkts[nb_rx++] = rxmb;
321         }
322
323         nb_hold += nb_rx + nb_err;
324         cq->to_clean = rx_id;
325
326         if (nb_hold > rq->rx_free_thresh) {
327                 rq->posted_index = enic_ring_add(rq->ring.desc_count,
328                                 rq->posted_index, nb_hold);
329                 nb_hold = 0;
330                 rte_mb();
331                 iowrite32(rq->posted_index, &rq->ctrl->posted_index);
332         }
333
334         rq->rx_nb_hold = nb_hold;
335
336         return nb_rx;
337 }
338
339 static inline void enic_free_wq_bufs(struct vnic_wq *wq, u16 completed_index)
340 {
341         struct vnic_wq_buf *buf;
342         struct rte_mbuf *m, *free[ENIC_MAX_WQ_DESCS];
343         unsigned int nb_to_free, nb_free = 0, i;
344         struct rte_mempool *pool;
345         unsigned int tail_idx;
346         unsigned int desc_count = wq->ring.desc_count;
347
348         nb_to_free = enic_ring_sub(desc_count, wq->tail_idx, completed_index)
349                                    + 1;
350         tail_idx = wq->tail_idx;
351         buf = &wq->bufs[tail_idx];
352         pool = ((struct rte_mbuf *)buf->mb)->pool;
353         for (i = 0; i < nb_to_free; i++) {
354                 buf = &wq->bufs[tail_idx];
355                 m = (struct rte_mbuf *)(buf->mb);
356                 if (likely(m->pool == pool)) {
357                         ENIC_ASSERT(nb_free < ENIC_MAX_WQ_DESCS);
358                         free[nb_free++] = m;
359                 } else {
360                         rte_mempool_put_bulk(pool, (void *)free, nb_free);
361                         free[0] = m;
362                         nb_free = 1;
363                         pool = m->pool;
364                 }
365                 tail_idx = enic_ring_incr(desc_count, tail_idx);
366                 buf->mb = NULL;
367         }
368
369         rte_mempool_put_bulk(pool, (void **)free, nb_free);
370
371         wq->tail_idx = tail_idx;
372         wq->ring.desc_avail += nb_to_free;
373 }
374
375 unsigned int enic_cleanup_wq(__rte_unused struct enic *enic, struct vnic_wq *wq)
376 {
377         u16 completed_index;
378
379         completed_index = *((uint32_t *)wq->cqmsg_rz->addr) & 0xffff;
380
381         if (wq->last_completed_index != completed_index) {
382                 enic_free_wq_bufs(wq, completed_index);
383                 wq->last_completed_index = completed_index;
384         }
385         return 0;
386 }
387
388 uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
389         uint16_t nb_pkts)
390 {
391         uint16_t index;
392         unsigned int pkt_len, data_len;
393         unsigned int nb_segs;
394         struct rte_mbuf *tx_pkt;
395         struct vnic_wq *wq = (struct vnic_wq *)tx_queue;
396         struct enic *enic = vnic_dev_priv(wq->vdev);
397         unsigned short vlan_id;
398         uint64_t ol_flags;
399         uint64_t ol_flags_mask;
400         unsigned int wq_desc_avail;
401         int head_idx;
402         struct vnic_wq_buf *buf;
403         unsigned int desc_count;
404         struct wq_enet_desc *descs, *desc_p, desc_tmp;
405         uint16_t mss;
406         uint8_t vlan_tag_insert;
407         uint8_t eop;
408         uint64_t bus_addr;
409
410         enic_cleanup_wq(enic, wq);
411         wq_desc_avail = vnic_wq_desc_avail(wq);
412         head_idx = wq->head_idx;
413         desc_count = wq->ring.desc_count;
414         ol_flags_mask = PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM | PKT_TX_L4_MASK;
415
416         nb_pkts = RTE_MIN(nb_pkts, ENIC_TX_XMIT_MAX);
417
418         for (index = 0; index < nb_pkts; index++) {
419                 tx_pkt = *tx_pkts++;
420                 nb_segs = tx_pkt->nb_segs;
421                 if (nb_segs > wq_desc_avail) {
422                         if (index > 0)
423                                 goto post;
424                         goto done;
425                 }
426
427                 pkt_len = tx_pkt->pkt_len;
428                 data_len = tx_pkt->data_len;
429                 ol_flags = tx_pkt->ol_flags;
430                 mss = 0;
431                 vlan_id = 0;
432                 vlan_tag_insert = 0;
433                 bus_addr = (dma_addr_t)
434                            (tx_pkt->buf_physaddr + tx_pkt->data_off);
435
436                 descs = (struct wq_enet_desc *)wq->ring.descs;
437                 desc_p = descs + head_idx;
438
439                 eop = (data_len == pkt_len);
440
441                 if (ol_flags & ol_flags_mask) {
442                         if (ol_flags & PKT_TX_VLAN_PKT) {
443                                 vlan_tag_insert = 1;
444                                 vlan_id = tx_pkt->vlan_tci;
445                         }
446
447                         if (ol_flags & PKT_TX_IP_CKSUM)
448                                 mss |= ENIC_CALC_IP_CKSUM;
449
450                         /* Nic uses just 1 bit for UDP and TCP */
451                         switch (ol_flags & PKT_TX_L4_MASK) {
452                         case PKT_TX_TCP_CKSUM:
453                         case PKT_TX_UDP_CKSUM:
454                                 mss |= ENIC_CALC_TCP_UDP_CKSUM;
455                                 break;
456                         }
457                 }
458
459                 wq_enet_desc_enc(&desc_tmp, bus_addr, data_len, mss, 0, 0, eop,
460                                  eop, 0, vlan_tag_insert, vlan_id, 0);
461
462                 *desc_p = desc_tmp;
463                 buf = &wq->bufs[head_idx];
464                 buf->mb = (void *)tx_pkt;
465                 head_idx = enic_ring_incr(desc_count, head_idx);
466                 wq_desc_avail--;
467
468                 if (!eop) {
469                         for (tx_pkt = tx_pkt->next; tx_pkt; tx_pkt =
470                             tx_pkt->next) {
471                                 data_len = tx_pkt->data_len;
472
473                                 if (tx_pkt->next == NULL)
474                                         eop = 1;
475                                 desc_p = descs + head_idx;
476                                 bus_addr = (dma_addr_t)(tx_pkt->buf_physaddr
477                                            + tx_pkt->data_off);
478                                 wq_enet_desc_enc((struct wq_enet_desc *)
479                                                  &desc_tmp, bus_addr, data_len,
480                                                  mss, 0, 0, eop, eop, 0,
481                                                  vlan_tag_insert, vlan_id, 0);
482
483                                 *desc_p = desc_tmp;
484                                 buf = &wq->bufs[head_idx];
485                                 buf->mb = (void *)tx_pkt;
486                                 head_idx = enic_ring_incr(desc_count, head_idx);
487                                 wq_desc_avail--;
488                         }
489                 }
490         }
491  post:
492         rte_wmb();
493         iowrite32(head_idx, &wq->ctrl->posted_index);
494  done:
495         wq->ring.desc_avail = wq_desc_avail;
496         wq->head_idx = head_idx;
497
498         return index;
499 }
500
501