e4f43c5ef8aad73361cd4fd705db6e0e8988d2b5
[dpdk.git] / lib / librte_pmd_enic / enic_main.c
1 /*
2  * Copyright 2008-2014 Cisco Systems, Inc.  All rights reserved.
3  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
4  *
5  * Copyright (c) 2014, Cisco Systems, Inc.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  */
34 #ident "$Id$"
35
36 #include <stdio.h>
37
38 #include <sys/stat.h>
39 #include <sys/mman.h>
40 #include <fcntl.h>
41 #include <libgen.h>
42
43 #include <rte_pci.h>
44 #include <rte_memzone.h>
45 #include <rte_malloc.h>
46 #include <rte_mbuf.h>
47 #include <rte_string_fns.h>
48 #include <rte_ethdev.h>
49
50 #include "enic_compat.h"
51 #include "enic.h"
52 #include "wq_enet_desc.h"
53 #include "rq_enet_desc.h"
54 #include "cq_enet_desc.h"
55 #include "vnic_enet.h"
56 #include "vnic_dev.h"
57 #include "vnic_wq.h"
58 #include "vnic_rq.h"
59 #include "vnic_cq.h"
60 #include "vnic_intr.h"
61 #include "vnic_nic.h"
62
63 static inline int enic_is_sriov_vf(struct enic *enic)
64 {
65         return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
66 }
67
68 static int is_zero_addr(uint8_t *addr)
69 {
70         return !(addr[0] |  addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
71 }
72
73 static int is_mcast_addr(uint8_t *addr)
74 {
75         return addr[0] & 1;
76 }
77
78 static int is_eth_addr_valid(uint8_t *addr)
79 {
80         return !is_mcast_addr(addr) && !is_zero_addr(addr);
81 }
82
83 static inline struct rte_mbuf *
84 enic_rxmbuf_alloc(struct rte_mempool *mp)
85 {
86         struct rte_mbuf *m;
87
88         m = __rte_mbuf_raw_alloc(mp);
89         __rte_mbuf_sanity_check_raw(m, 0);
90         return m;
91 }
92
93 void enic_set_hdr_split_size(struct enic *enic, u16 split_hdr_size)
94 {
95         vnic_set_hdr_split_size(enic->vdev, split_hdr_size);
96 }
97
98 static void enic_free_wq_buf(__rte_unused struct vnic_wq *wq, struct vnic_wq_buf *buf)
99 {
100         struct rte_mbuf *mbuf = (struct rte_mbuf *)buf->os_buf;
101
102         rte_mempool_put(mbuf->pool, mbuf);
103         buf->os_buf = NULL;
104 }
105
106 static void enic_wq_free_buf(struct vnic_wq *wq,
107         __rte_unused struct cq_desc *cq_desc,
108         struct vnic_wq_buf *buf,
109         __rte_unused void *opaque)
110 {
111         enic_free_wq_buf(wq, buf);
112 }
113
114 static int enic_wq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc,
115         __rte_unused u8 type, u16 q_number, u16 completed_index, void *opaque)
116 {
117         struct enic *enic = vnic_dev_priv(vdev);
118
119         vnic_wq_service(&enic->wq[q_number], cq_desc,
120                 completed_index, enic_wq_free_buf,
121                 opaque);
122
123         return 0;
124 }
125
126 static void enic_log_q_error(struct enic *enic)
127 {
128         unsigned int i;
129         u32 error_status;
130
131         for (i = 0; i < enic->wq_count; i++) {
132                 error_status = vnic_wq_error_status(&enic->wq[i]);
133                 if (error_status)
134                         dev_err(enic, "WQ[%d] error_status %d\n", i,
135                                 error_status);
136         }
137
138         for (i = 0; i < enic->rq_count; i++) {
139                 error_status = vnic_rq_error_status(&enic->rq[i]);
140                 if (error_status)
141                         dev_err(enic, "RQ[%d] error_status %d\n", i,
142                                 error_status);
143         }
144 }
145
146 unsigned int enic_cleanup_wq(struct enic *enic, struct vnic_wq *wq)
147 {
148         unsigned int cq = enic_cq_wq(enic, wq->index);
149
150         /* Return the work done */
151         return vnic_cq_service(&enic->cq[cq],
152                 -1 /*wq_work_to_do*/, enic_wq_service, NULL);
153 }
154
155
156 int enic_send_pkt(struct enic *enic, struct vnic_wq *wq,
157         struct rte_mbuf *tx_pkt, unsigned short len,
158         uint8_t sop, uint8_t eop,
159         uint16_t ol_flags, uint16_t vlan_tag)
160 {
161         struct wq_enet_desc *desc = vnic_wq_next_desc(wq);
162         uint16_t mss = 0;
163         uint8_t cq_entry = eop;
164         uint8_t vlan_tag_insert = 0;
165         uint64_t bus_addr = (dma_addr_t)
166             (tx_pkt->buf_physaddr + RTE_PKTMBUF_HEADROOM);
167
168         if (sop) {
169                 if (ol_flags & PKT_TX_VLAN_PKT)
170                         vlan_tag_insert = 1;
171
172                 if (enic->hw_ip_checksum) {
173                         if (ol_flags & PKT_TX_IP_CKSUM)
174                                 mss |= ENIC_CALC_IP_CKSUM;
175
176                         if (ol_flags & PKT_TX_TCP_UDP_CKSUM)
177                                 mss |= ENIC_CALC_TCP_UDP_CKSUM;
178                 }
179         }
180
181         wq_enet_desc_enc(desc,
182                 bus_addr,
183                 len,
184                 mss,
185                 0 /* header_length */,
186                 0 /* offload_mode WQ_ENET_OFFLOAD_MODE_CSUM */,
187                 eop,
188                 cq_entry,
189                 0 /* fcoe_encap */,
190                 vlan_tag_insert,
191                 vlan_tag,
192                 0 /* loopback */);
193
194         vnic_wq_post(wq, (void *)tx_pkt, bus_addr, len,
195                 sop, eop,
196                 1 /*desc_skip_cnt*/,
197                 cq_entry,
198                 0 /*compressed send*/,
199                 0 /*wrid*/);
200
201         return 0;
202 }
203
204 void enic_dev_stats_clear(struct enic *enic)
205 {
206         if (vnic_dev_stats_clear(enic->vdev))
207                 dev_err(enic, "Error in clearing stats\n");
208 }
209
210 void enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
211 {
212         struct vnic_stats *stats;
213
214         memset(r_stats, 0, sizeof(*r_stats));
215         if (vnic_dev_stats_dump(enic->vdev, &stats)) {
216                 dev_err(enic, "Error in getting stats\n");
217                 return;
218         }
219
220         r_stats->ipackets = stats->rx.rx_frames_ok;
221         r_stats->opackets = stats->tx.tx_frames_ok;
222
223         r_stats->ibytes = stats->rx.rx_bytes_ok;
224         r_stats->obytes = stats->tx.tx_bytes_ok;
225
226         r_stats->ierrors = stats->rx.rx_errors;
227         r_stats->oerrors = stats->tx.tx_errors;
228
229         r_stats->imcasts = stats->rx.rx_multicast_frames_ok;
230         r_stats->rx_nombuf = stats->rx.rx_no_bufs;
231 }
232
233 void enic_del_mac_address(struct enic *enic)
234 {
235         if (vnic_dev_del_addr(enic->vdev, enic->mac_addr))
236                 dev_err(enic, "del mac addr failed\n");
237 }
238
239 void enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
240 {
241         int err;
242
243         if (!is_eth_addr_valid(mac_addr)) {
244                 dev_err(enic, "invalid mac address\n");
245                 return;
246         }
247
248         err = vnic_dev_del_addr(enic->vdev, mac_addr);
249         if (err) {
250                 dev_err(enic, "del mac addr failed\n");
251                 return;
252         }
253
254         ether_addr_copy((struct ether_addr *)mac_addr,
255                 (struct ether_addr *)enic->mac_addr);
256
257         err = vnic_dev_add_addr(enic->vdev, mac_addr);
258         if (err) {
259                 dev_err(enic, "add mac addr failed\n");
260                 return;
261         }
262 }
263
264 static void
265 enic_free_rq_buf(__rte_unused struct vnic_rq *rq, struct vnic_rq_buf *buf)
266 {
267         if (!buf->os_buf)
268                 return;
269
270         rte_pktmbuf_free((struct rte_mbuf *)buf->os_buf);
271         buf->os_buf = NULL;
272 }
273
274 void enic_init_vnic_resources(struct enic *enic)
275 {
276         unsigned int error_interrupt_enable = 1;
277         unsigned int error_interrupt_offset = 0;
278         unsigned int index = 0;
279
280         for (index = 0; index < enic->rq_count; index++) {
281                 vnic_rq_init(&enic->rq[index],
282                         enic_cq_rq(enic, index),
283                         error_interrupt_enable,
284                         error_interrupt_offset);
285         }
286
287         for (index = 0; index < enic->wq_count; index++) {
288                 vnic_wq_init(&enic->wq[index],
289                         enic_cq_wq(enic, index),
290                         error_interrupt_enable,
291                         error_interrupt_offset);
292         }
293
294         vnic_dev_stats_clear(enic->vdev);
295
296         for (index = 0; index < enic->cq_count; index++) {
297                 vnic_cq_init(&enic->cq[index],
298                         0 /* flow_control_enable */,
299                         1 /* color_enable */,
300                         0 /* cq_head */,
301                         0 /* cq_tail */,
302                         1 /* cq_tail_color */,
303                         0 /* interrupt_enable */,
304                         1 /* cq_entry_enable */,
305                         0 /* cq_message_enable */,
306                         0 /* interrupt offset */,
307                         0 /* cq_message_addr */);
308         }
309
310         vnic_intr_init(&enic->intr,
311                 enic->config.intr_timer_usec,
312                 enic->config.intr_timer_type,
313                 /*mask_on_assertion*/1);
314 }
315
316
317 static int enic_rq_alloc_buf(struct vnic_rq *rq)
318 {
319         struct enic *enic = vnic_dev_priv(rq->vdev);
320         dma_addr_t dma_addr;
321         struct rq_enet_desc *desc = vnic_rq_next_desc(rq);
322         uint8_t type = RQ_ENET_TYPE_ONLY_SOP;
323         u16 split_hdr_size = vnic_get_hdr_split_size(enic->vdev);
324         struct rte_mbuf *mbuf = enic_rxmbuf_alloc(rq->mp);
325         struct rte_mbuf *hdr_mbuf = NULL;
326
327         if (!mbuf) {
328                 dev_err(enic, "mbuf alloc in enic_rq_alloc_buf failed\n");
329                 return -1;
330         }
331
332         if (unlikely(split_hdr_size)) {
333                 if (vnic_rq_desc_avail(rq) < 2) {
334                         rte_mempool_put(mbuf->pool, mbuf);
335                         return -1;
336                 }
337                 hdr_mbuf = enic_rxmbuf_alloc(rq->mp);
338                 if (!hdr_mbuf) {
339                         rte_mempool_put(mbuf->pool, mbuf);
340                         dev_err(enic,
341                                 "hdr_mbuf alloc in enic_rq_alloc_buf failed\n");
342                         return -1;
343                 }
344
345                 hdr_mbuf->data_off = RTE_PKTMBUF_HEADROOM;
346
347                 hdr_mbuf->nb_segs = 2;
348                 hdr_mbuf->port = rq->index;
349                 hdr_mbuf->next = mbuf;
350
351                 dma_addr = (dma_addr_t)
352                     (hdr_mbuf->buf_physaddr + hdr_mbuf->data_off);
353
354                 rq_enet_desc_enc(desc, dma_addr, type, split_hdr_size);
355
356                 vnic_rq_post(rq, (void *)hdr_mbuf, 0 /*os_buf_index*/, dma_addr,
357                         (unsigned int)split_hdr_size, 0 /*wrid*/);
358
359                 desc = vnic_rq_next_desc(rq);
360                 type = RQ_ENET_TYPE_NOT_SOP;
361         } else {
362                 mbuf->nb_segs = 1;
363                 mbuf->port = rq->index;
364         }
365
366         mbuf->data_off = RTE_PKTMBUF_HEADROOM;
367         mbuf->next = NULL;
368
369         dma_addr = (dma_addr_t)
370             (mbuf->buf_physaddr + mbuf->data_off);
371
372         rq_enet_desc_enc(desc, dma_addr, type, mbuf->buf_len);
373
374         vnic_rq_post(rq, (void *)mbuf, 0 /*os_buf_index*/, dma_addr,
375                 (unsigned int)mbuf->buf_len, 0 /*wrid*/);
376
377         return 0;
378 }
379
380 static int enic_rq_indicate_buf(struct vnic_rq *rq,
381         struct cq_desc *cq_desc, struct vnic_rq_buf *buf,
382         int skipped, void *opaque)
383 {
384         struct enic *enic = vnic_dev_priv(rq->vdev);
385         struct rte_mbuf **rx_pkt_bucket = (struct rte_mbuf **)opaque;
386         struct rte_mbuf *rx_pkt = NULL;
387         struct rte_mbuf *hdr_rx_pkt = NULL;
388
389         u8 type, color, eop, sop, ingress_port, vlan_stripped;
390         u8 fcoe, fcoe_sof, fcoe_fc_crc_ok, fcoe_enc_error, fcoe_eof;
391         u8 tcp_udp_csum_ok, udp, tcp, ipv4_csum_ok;
392         u8 ipv6, ipv4, ipv4_fragment, fcs_ok, rss_type, csum_not_calc;
393         u8 packet_error;
394         u16 q_number, completed_index, bytes_written, vlan_tci, checksum;
395         u32 rss_hash;
396
397         cq_enet_rq_desc_dec((struct cq_enet_rq_desc *)cq_desc,
398                 &type, &color, &q_number, &completed_index,
399                 &ingress_port, &fcoe, &eop, &sop, &rss_type,
400                 &csum_not_calc, &rss_hash, &bytes_written,
401                 &packet_error, &vlan_stripped, &vlan_tci, &checksum,
402                 &fcoe_sof, &fcoe_fc_crc_ok, &fcoe_enc_error,
403                 &fcoe_eof, &tcp_udp_csum_ok, &udp, &tcp,
404                 &ipv4_csum_ok, &ipv6, &ipv4, &ipv4_fragment,
405                 &fcs_ok);
406
407         rx_pkt = (struct rte_mbuf *)buf->os_buf;
408         buf->os_buf = NULL;
409
410         if (unlikely(packet_error)) {
411                 dev_err(enic, "packet error\n");
412                 rx_pkt->data_len = 0;
413                 return 0;
414         }
415
416         if (unlikely(skipped)) {
417                 rx_pkt->data_len = 0;
418                 return 0;
419         }
420
421         if (likely(!vnic_get_hdr_split_size(enic->vdev))) {
422                 /* No header split configured */
423                 *rx_pkt_bucket = rx_pkt;
424                 rx_pkt->pkt_len = bytes_written;
425
426                 if (ipv4) {
427                         rx_pkt->ol_flags |= PKT_RX_IPV4_HDR;
428                         if (!csum_not_calc) {
429                                 if (unlikely(!ipv4_csum_ok))
430                                         rx_pkt->ol_flags |= PKT_RX_IP_CKSUM_BAD;
431
432                                 if ((tcp || udp) && (!tcp_udp_csum_ok))
433                                         rx_pkt->ol_flags |= PKT_RX_L4_CKSUM_BAD;
434                         }
435                 } else if (ipv6)
436                         rx_pkt->ol_flags |= PKT_RX_IPV6_HDR;
437         } else {
438                 /* Header split */
439                 if (sop && !eop) {
440                         /* This piece is header */
441                         *rx_pkt_bucket = rx_pkt;
442                         rx_pkt->pkt_len = bytes_written;
443                 } else {
444                         if (sop && eop) {
445                                 /* The packet is smaller than split_hdr_size */
446                                 *rx_pkt_bucket = rx_pkt;
447                                 rx_pkt->pkt_len = bytes_written;
448                                 if (ipv4) {
449                                         rx_pkt->ol_flags |= PKT_RX_IPV4_HDR;
450                                         if (!csum_not_calc) {
451                                                 if (unlikely(!ipv4_csum_ok))
452                                                         rx_pkt->ol_flags |=
453                                                             PKT_RX_IP_CKSUM_BAD;
454
455                                                 if ((tcp || udp) &&
456                                                     (!tcp_udp_csum_ok))
457                                                         rx_pkt->ol_flags |=
458                                                             PKT_RX_L4_CKSUM_BAD;
459                                         }
460                                 } else if (ipv6)
461                                         rx_pkt->ol_flags |= PKT_RX_IPV6_HDR;
462                         } else {
463                                 /* Payload */
464                                 hdr_rx_pkt = *rx_pkt_bucket;
465                                 hdr_rx_pkt->pkt_len += bytes_written;
466                                 if (ipv4) {
467                                         hdr_rx_pkt->ol_flags |= PKT_RX_IPV4_HDR;
468                                         if (!csum_not_calc) {
469                                                 if (unlikely(!ipv4_csum_ok))
470                                                         hdr_rx_pkt->ol_flags |=
471                                                             PKT_RX_IP_CKSUM_BAD;
472
473                                                 if ((tcp || udp) &&
474                                                     (!tcp_udp_csum_ok))
475                                                         hdr_rx_pkt->ol_flags |=
476                                                             PKT_RX_L4_CKSUM_BAD;
477                                         }
478                                 } else if (ipv6)
479                                         hdr_rx_pkt->ol_flags |= PKT_RX_IPV6_HDR;
480
481                         }
482                 }
483         }
484
485         rx_pkt->data_len = bytes_written;
486
487         if (rss_hash) {
488                 rx_pkt->ol_flags |= PKT_RX_RSS_HASH;
489                 rx_pkt->hash.rss = rss_hash;
490         }
491
492         if (vlan_tci) {
493                 rx_pkt->ol_flags |= PKT_RX_VLAN_PKT;
494                 rx_pkt->vlan_tci = vlan_tci;
495         }
496
497         return eop;
498 }
499
500 static int enic_rq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc,
501         __rte_unused u8 type, u16 q_number, u16 completed_index, void *opaque)
502 {
503         struct enic *enic = vnic_dev_priv(vdev);
504
505         return vnic_rq_service(&enic->rq[q_number], cq_desc,
506                 completed_index, VNIC_RQ_RETURN_DESC,
507                 enic_rq_indicate_buf, opaque);
508
509 }
510
511 int enic_poll(struct vnic_rq *rq, struct rte_mbuf **rx_pkts,
512         unsigned int budget, unsigned int *work_done)
513 {
514         struct enic *enic = vnic_dev_priv(rq->vdev);
515         unsigned int cq = enic_cq_rq(enic, rq->index);
516         int err = 0;
517
518         *work_done = vnic_cq_service(&enic->cq[cq],
519                 budget, enic_rq_service, (void *)rx_pkts);
520
521         if (*work_done) {
522                 vnic_rq_fill(rq, enic_rq_alloc_buf);
523
524                 /* Need at least one buffer on ring to get going */
525                 if (vnic_rq_desc_used(rq) == 0) {
526                         dev_err(enic, "Unable to alloc receive buffers\n");
527                         err = -1;
528                 }
529         }
530         return err;
531 }
532
533 static void *
534 enic_alloc_consistent(__rte_unused void *priv, size_t size,
535         dma_addr_t *dma_handle, u8 *name)
536 {
537         void *vaddr;
538         const struct rte_memzone *rz;
539         *dma_handle = 0;
540
541         rz = rte_memzone_reserve_aligned((const char *)name,
542                 size, 0, 0, ENIC_ALIGN);
543         if (!rz) {
544                 pr_err("%s : Failed to allocate memory requested for %s",
545                         __func__, name);
546                 return NULL;
547         }
548
549         vaddr = rz->addr;
550         *dma_handle = (dma_addr_t)rz->phys_addr;
551
552         return vaddr;
553 }
554
555 static void
556 enic_free_consistent(__rte_unused struct rte_pci_device *hwdev,
557         __rte_unused size_t size,
558         __rte_unused void *vaddr,
559         __rte_unused dma_addr_t dma_handle)
560 {
561         /* Nothing to be done */
562 }
563
564 static void
565 enic_intr_handler(__rte_unused struct rte_intr_handle *handle,
566         void *arg)
567 {
568         struct enic *enic = pmd_priv((struct rte_eth_dev *)arg);
569
570         dev_err(enic, "Err intr.\n");
571         vnic_intr_return_all_credits(&enic->intr);
572
573         enic_log_q_error(enic);
574 }
575
576 int enic_enable(struct enic *enic)
577 {
578         unsigned int index;
579         struct rte_eth_dev *eth_dev = enic->rte_dev;
580
581         eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
582         eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
583         vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
584
585         if (enic_clsf_init(enic))
586                 dev_warning(enic, "Init of hash table for clsf failed."\
587                         "Flow director feature will not work\n");
588
589         /* Fill RQ bufs */
590         for (index = 0; index < enic->rq_count; index++) {
591                 vnic_rq_fill(&enic->rq[index], enic_rq_alloc_buf);
592
593                 /* Need at least one buffer on ring to get going
594                 */
595                 if (vnic_rq_desc_used(&enic->rq[index]) == 0) {
596                         dev_err(enic, "Unable to alloc receive buffers\n");
597                         return -1;
598                 }
599         }
600
601         for (index = 0; index < enic->wq_count; index++)
602                 vnic_wq_enable(&enic->wq[index]);
603         for (index = 0; index < enic->rq_count; index++)
604                 vnic_rq_enable(&enic->rq[index]);
605
606         vnic_dev_enable_wait(enic->vdev);
607
608 #ifndef VFIO_PRESENT
609         /* Register and enable error interrupt */
610         rte_intr_callback_register(&(enic->pdev->intr_handle),
611                 enic_intr_handler, (void *)enic->rte_dev);
612
613         rte_intr_enable(&(enic->pdev->intr_handle));
614 #endif
615         vnic_intr_unmask(&enic->intr);
616
617         return 0;
618 }
619
620 int enic_alloc_intr_resources(struct enic *enic)
621 {
622         int err;
623
624         dev_info(enic, "vNIC resources used:  "\
625                 "wq %d rq %d cq %d intr %d\n",
626                 enic->wq_count, enic->rq_count,
627                 enic->cq_count, enic->intr_count);
628
629         err = vnic_intr_alloc(enic->vdev, &enic->intr, 0);
630         if (err)
631                 enic_free_vnic_resources(enic);
632
633         return err;
634 }
635
636 void enic_free_rq(void *rxq)
637 {
638         struct vnic_rq *rq = (struct vnic_rq *)rxq;
639         struct enic *enic = vnic_dev_priv(rq->vdev);
640
641         vnic_rq_free(rq);
642         vnic_cq_free(&enic->cq[rq->index]);
643 }
644
645 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
646 {
647         vnic_wq_enable(&enic->wq[queue_idx]);
648 }
649
650 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
651 {
652         return vnic_wq_disable(&enic->wq[queue_idx]);
653 }
654
655 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
656 {
657         vnic_rq_enable(&enic->rq[queue_idx]);
658 }
659
660 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
661 {
662         return vnic_rq_disable(&enic->rq[queue_idx]);
663 }
664
665 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
666         unsigned int socket_id, struct rte_mempool *mp,
667         uint16_t nb_desc)
668 {
669         int err;
670         struct vnic_rq *rq = &enic->rq[queue_idx];
671
672         rq->socket_id = socket_id;
673         rq->mp = mp;
674
675         if (nb_desc) {
676                 if (nb_desc > enic->config.rq_desc_count) {
677                         dev_warning(enic,
678                                 "RQ %d - number of rx desc in cmd line (%d)"\
679                                 "is greater than that in the UCSM/CIMC adapter"\
680                                 "policy.  Applying the value in the adapter "\
681                                 "policy (%d).\n",
682                                 queue_idx, nb_desc, enic->config.rq_desc_count);
683                 } else if (nb_desc != enic->config.rq_desc_count) {
684                         enic->config.rq_desc_count = nb_desc;
685                         dev_info(enic,
686                                 "RX Queues - effective number of descs:%d\n",
687                                 nb_desc);
688                 }
689         }
690
691         /* Allocate queue resources */
692         err = vnic_rq_alloc(enic->vdev, &enic->rq[queue_idx], queue_idx,
693                 enic->config.rq_desc_count,
694                 sizeof(struct rq_enet_desc));
695         if (err) {
696                 dev_err(enic, "error in allocation of rq\n");
697                 return err;
698         }
699
700         err = vnic_cq_alloc(enic->vdev, &enic->cq[queue_idx], queue_idx,
701                 socket_id, enic->config.rq_desc_count,
702                 sizeof(struct cq_enet_rq_desc));
703         if (err) {
704                 vnic_rq_free(rq);
705                 dev_err(enic, "error in allocation of cq for rq\n");
706         }
707
708         return err;
709 }
710
711 void enic_free_wq(void *txq)
712 {
713         struct vnic_wq *wq = (struct vnic_wq *)txq;
714         struct enic *enic = vnic_dev_priv(wq->vdev);
715
716         vnic_wq_free(wq);
717         vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
718 }
719
720 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
721         unsigned int socket_id, uint16_t nb_desc)
722 {
723         int err;
724         struct vnic_wq *wq = &enic->wq[queue_idx];
725         unsigned int cq_index = enic_cq_wq(enic, queue_idx);
726
727         wq->socket_id = socket_id;
728         if (nb_desc) {
729                 if (nb_desc > enic->config.wq_desc_count) {
730                         dev_warning(enic,
731                                 "WQ %d - number of tx desc in cmd line (%d)"\
732                                 "is greater than that in the UCSM/CIMC adapter"\
733                                 "policy.  Applying the value in the adapter "\
734                                 "policy (%d)\n",
735                                 queue_idx, nb_desc, enic->config.wq_desc_count);
736                 } else if (nb_desc != enic->config.wq_desc_count) {
737                         enic->config.wq_desc_count = nb_desc;
738                         dev_info(enic,
739                                 "TX Queues - effective number of descs:%d\n",
740                                 nb_desc);
741                 }
742         }
743
744         /* Allocate queue resources */
745         err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
746                 enic->config.wq_desc_count,
747                 sizeof(struct wq_enet_desc));
748         if (err) {
749                 dev_err(enic, "error in allocation of wq\n");
750                 return err;
751         }
752
753         err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
754                 socket_id, enic->config.wq_desc_count,
755                 sizeof(struct cq_enet_wq_desc));
756         if (err) {
757                 vnic_wq_free(wq);
758                 dev_err(enic, "error in allocation of cq for wq\n");
759         }
760
761         return err;
762 }
763
764 int enic_disable(struct enic *enic)
765 {
766         unsigned int i;
767         int err;
768
769         vnic_intr_mask(&enic->intr);
770         (void)vnic_intr_masked(&enic->intr); /* flush write */
771
772         vnic_dev_disable(enic->vdev);
773
774         enic_clsf_destroy(enic);
775
776         if (!enic_is_sriov_vf(enic))
777                 vnic_dev_del_addr(enic->vdev, enic->mac_addr);
778
779         for (i = 0; i < enic->wq_count; i++) {
780                 err = vnic_wq_disable(&enic->wq[i]);
781                 if (err)
782                         return err;
783         }
784         for (i = 0; i < enic->rq_count; i++) {
785                 err = vnic_rq_disable(&enic->rq[i]);
786                 if (err)
787                         return err;
788         }
789
790         vnic_dev_set_reset_flag(enic->vdev, 1);
791         vnic_dev_notify_unset(enic->vdev);
792
793         for (i = 0; i < enic->wq_count; i++)
794                 vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
795         for (i = 0; i < enic->rq_count; i++)
796                 vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
797         for (i = 0; i < enic->cq_count; i++)
798                 vnic_cq_clean(&enic->cq[i]);
799         vnic_intr_clean(&enic->intr);
800
801         return 0;
802 }
803
804 static int enic_dev_wait(struct vnic_dev *vdev,
805         int (*start)(struct vnic_dev *, int),
806         int (*finished)(struct vnic_dev *, int *),
807         int arg)
808 {
809         int done;
810         int err;
811         int i;
812
813         err = start(vdev, arg);
814         if (err)
815                 return err;
816
817         /* Wait for func to complete...2 seconds max */
818         for (i = 0; i < 2000; i++) {
819                 err = finished(vdev, &done);
820                 if (err)
821                         return err;
822                 if (done)
823                         return 0;
824                 usleep(1000);
825         }
826         return -ETIMEDOUT;
827 }
828
829 static int enic_dev_open(struct enic *enic)
830 {
831         int err;
832
833         err = enic_dev_wait(enic->vdev, vnic_dev_open,
834                 vnic_dev_open_done, 0);
835         if (err)
836                 dev_err(enic_get_dev(enic),
837                         "vNIC device open failed, err %d\n", err);
838
839         return err;
840 }
841
842 static int enic_set_rsskey(struct enic *enic)
843 {
844         dma_addr_t rss_key_buf_pa;
845         union vnic_rss_key *rss_key_buf_va = NULL;
846         static union vnic_rss_key rss_key = {
847                 .key[0] = {.b = {85, 67, 83, 97, 119, 101, 115, 111, 109, 101}},
848                 .key[1] = {.b = {80, 65, 76, 79, 117, 110, 105, 113, 117, 101}},
849                 .key[2] = {.b = {76, 73, 78, 85, 88, 114, 111, 99, 107, 115}},
850                 .key[3] = {.b = {69, 78, 73, 67, 105, 115, 99, 111, 111, 108}},
851         };
852         int err;
853         u8 name[NAME_MAX];
854
855         snprintf((char *)name, NAME_MAX, "rss_key-%s", enic->bdf_name);
856         rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
857                 &rss_key_buf_pa, name);
858         if (!rss_key_buf_va)
859                 return -ENOMEM;
860
861         rte_memcpy(rss_key_buf_va, &rss_key, sizeof(union vnic_rss_key));
862
863         err = enic_set_rss_key(enic,
864                 rss_key_buf_pa,
865                 sizeof(union vnic_rss_key));
866
867         enic_free_consistent(enic->pdev, sizeof(union vnic_rss_key),
868                 rss_key_buf_va, rss_key_buf_pa);
869
870         return err;
871 }
872
873 static int enic_set_rsscpu(struct enic *enic, u8 rss_hash_bits)
874 {
875         dma_addr_t rss_cpu_buf_pa;
876         union vnic_rss_cpu *rss_cpu_buf_va = NULL;
877         int i;
878         int err;
879         u8 name[NAME_MAX];
880
881         snprintf((char *)name, NAME_MAX, "rss_cpu-%s", enic->bdf_name);
882         rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
883                 &rss_cpu_buf_pa, name);
884         if (!rss_cpu_buf_va)
885                 return -ENOMEM;
886
887         for (i = 0; i < (1 << rss_hash_bits); i++)
888                 (*rss_cpu_buf_va).cpu[i/4].b[i%4] = i % enic->rq_count;
889
890         err = enic_set_rss_cpu(enic,
891                 rss_cpu_buf_pa,
892                 sizeof(union vnic_rss_cpu));
893
894         enic_free_consistent(enic->pdev, sizeof(union vnic_rss_cpu),
895                 rss_cpu_buf_va, rss_cpu_buf_pa);
896
897         return err;
898 }
899
900 static int enic_set_niccfg(struct enic *enic, u8 rss_default_cpu,
901         u8 rss_hash_type, u8 rss_hash_bits, u8 rss_base_cpu, u8 rss_enable)
902 {
903         const u8 tso_ipid_split_en = 0;
904         int err;
905
906         /* Enable VLAN tag stripping */
907
908         err = enic_set_nic_cfg(enic,
909                 rss_default_cpu, rss_hash_type,
910                 rss_hash_bits, rss_base_cpu,
911                 rss_enable, tso_ipid_split_en,
912                 enic->ig_vlan_strip_en);
913
914         return err;
915 }
916
917 int enic_set_rss_nic_cfg(struct enic *enic)
918 {
919         const u8 rss_default_cpu = 0;
920         const u8 rss_hash_type = NIC_CFG_RSS_HASH_TYPE_IPV4 |
921             NIC_CFG_RSS_HASH_TYPE_TCP_IPV4 |
922             NIC_CFG_RSS_HASH_TYPE_IPV6 |
923             NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
924         const u8 rss_hash_bits = 7;
925         const u8 rss_base_cpu = 0;
926         u8 rss_enable = ENIC_SETTING(enic, RSS) && (enic->rq_count > 1);
927
928         if (rss_enable) {
929                 if (!enic_set_rsskey(enic)) {
930                         if (enic_set_rsscpu(enic, rss_hash_bits)) {
931                                 rss_enable = 0;
932                                 dev_warning(enic, "RSS disabled, "\
933                                         "Failed to set RSS cpu indirection table.");
934                         }
935                 } else {
936                         rss_enable = 0;
937                         dev_warning(enic,
938                                 "RSS disabled, Failed to set RSS key.\n");
939                 }
940         }
941
942         return enic_set_niccfg(enic, rss_default_cpu, rss_hash_type,
943                 rss_hash_bits, rss_base_cpu, rss_enable);
944 }
945
946 int enic_setup_finish(struct enic *enic)
947 {
948         int ret;
949
950         ret = enic_set_rss_nic_cfg(enic);
951         if (ret) {
952                 dev_err(enic, "Failed to config nic, aborting.\n");
953                 return -1;
954         }
955
956         vnic_dev_add_addr(enic->vdev, enic->mac_addr);
957
958         /* Default conf */
959         vnic_dev_packet_filter(enic->vdev,
960                 1 /* directed  */,
961                 1 /* multicast */,
962                 1 /* broadcast */,
963                 0 /* promisc   */,
964                 1 /* allmulti  */);
965
966         enic->promisc = 0;
967         enic->allmulti = 1;
968
969         return 0;
970 }
971
972 #ifdef VFIO_PRESENT
973 static void enic_eventfd_init(struct enic *enic)
974 {
975         enic->eventfd = enic->pdev->intr_handle.fd;
976 }
977
978 void *enic_err_intr_handler(void *arg)
979 {
980         struct enic *enic = (struct enic *)arg;
981         unsigned int intr = enic_msix_err_intr(enic);
982         ssize_t size;
983         uint64_t data;
984
985         while (1) {
986                 size = read(enic->eventfd, &data, sizeof(data));
987                 dev_err(enic, "Err intr.\n");
988                 vnic_intr_return_all_credits(&enic->intr);
989
990                 enic_log_q_error(enic);
991         }
992
993         return NULL;
994 }
995 #endif
996
997 void enic_add_packet_filter(struct enic *enic)
998 {
999         /* Args -> directed, multicast, broadcast, promisc, allmulti */
1000         vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1001                 enic->promisc, enic->allmulti);
1002 }
1003
1004 int enic_get_link_status(struct enic *enic)
1005 {
1006         return vnic_dev_link_status(enic->vdev);
1007 }
1008
1009
1010 #ifdef VFIO_PRESENT
1011 static int enic_create_err_intr_thread(struct enic *enic)
1012 {
1013         pthread_attr_t intr_attr;
1014
1015         /* create threads for error interrupt handling */
1016         pthread_attr_init(&intr_attr);
1017         pthread_attr_setstacksize(&intr_attr, 0x100000);
1018
1019         /* ERR */
1020         if (pthread_create(&enic->err_intr_thread, &intr_attr,
1021                     enic_err_intr_handler, (void *)enic)) {
1022                 dev_err(enic, "Failed to create err interrupt handler threads\n");
1023                 return -1;
1024         }
1025
1026         pthread_attr_destroy(&intr_attr);
1027
1028         return 0;
1029 }
1030
1031
1032 static int enic_set_intr_mode(struct enic *enic)
1033 {
1034         struct vfio_irq_set *irq_set;
1035         int *fds;
1036         int size;
1037         int ret = -1;
1038         int index;
1039
1040         if (enic->intr_count < 1) {
1041                 dev_err(enic, "Unsupported resource conf.\n");
1042                 return -1;
1043         }
1044         vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_MSIX);
1045
1046         enic->intr_count = 1;
1047
1048         enic_eventfd_init(enic);
1049         size = sizeof(*irq_set) + (sizeof(int));
1050
1051         irq_set = rte_zmalloc("enic_vfio_irq", size, 0);
1052         irq_set->argsz = size;
1053         irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
1054         irq_set->start = 0;
1055         irq_set->count = 1; /* For error interrupt only */
1056         irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
1057             VFIO_IRQ_SET_ACTION_TRIGGER;
1058         fds = (int *)&irq_set->data;
1059
1060         fds[0] = enic->eventfd;
1061
1062         ret = ioctl(enic->pdev->intr_handle.vfio_dev_fd,
1063                 VFIO_DEVICE_SET_IRQS, irq_set);
1064         rte_free(irq_set);
1065         if (ret) {
1066                 dev_err(enic, "Failed to set eventfds for interrupts\n");
1067                 return -1;
1068         }
1069
1070         enic_create_err_intr_thread(enic);
1071         return 0;
1072 }
1073
1074 static void enic_clear_intr_mode(struct enic *enic)
1075 {
1076         vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_UNKNOWN);
1077 }
1078 #endif
1079
1080 static void enic_dev_deinit(struct enic *enic)
1081 {
1082         struct rte_eth_dev *eth_dev = enic->rte_dev;
1083
1084         if (eth_dev->data->mac_addrs)
1085                 rte_free(eth_dev->data->mac_addrs);
1086
1087 #ifdef VFIO_PRESENT
1088         enic_clear_intr_mode(enic);
1089 #endif
1090 }
1091
1092
1093 int enic_set_vnic_res(struct enic *enic)
1094 {
1095         struct rte_eth_dev *eth_dev = enic->rte_dev;
1096
1097         if ((enic->rq_count < eth_dev->data->nb_rx_queues) ||
1098                 (enic->wq_count < eth_dev->data->nb_tx_queues)) {
1099                 dev_err(dev, "Not enough resources configured, aborting\n");
1100                 return -1;
1101         }
1102
1103         enic->rq_count = eth_dev->data->nb_rx_queues;
1104         enic->wq_count = eth_dev->data->nb_tx_queues;
1105         if (enic->cq_count < (enic->rq_count + enic->wq_count)) {
1106                 dev_err(dev, "Not enough resources configured, aborting\n");
1107                 return -1;
1108         }
1109
1110         enic->cq_count = enic->rq_count + enic->wq_count;
1111         return 0;
1112 }
1113
1114 static int enic_dev_init(struct enic *enic)
1115 {
1116         int err;
1117         struct rte_eth_dev *eth_dev = enic->rte_dev;
1118
1119         vnic_dev_intr_coal_timer_info_default(enic->vdev);
1120
1121         /* Get vNIC configuration
1122         */
1123         err = enic_get_vnic_config(enic);
1124         if (err) {
1125                 dev_err(dev, "Get vNIC configuration failed, aborting\n");
1126                 return err;
1127         }
1128
1129         eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr", ETH_ALEN, 0);
1130         if (!eth_dev->data->mac_addrs) {
1131                 dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1132                 return -1;
1133         }
1134         ether_addr_copy((struct ether_addr *) enic->mac_addr,
1135                 &eth_dev->data->mac_addrs[0]);
1136
1137
1138         /* Get available resource counts
1139         */
1140         enic_get_res_counts(enic);
1141
1142 #ifdef VFIO_PRESENT
1143         /* Set interrupt mode based on resource counts and system
1144          * capabilities
1145          */
1146         err = enic_set_intr_mode(enic);
1147         if (err) {
1148                 rte_free(eth_dev->data->mac_addrs);
1149                 enic_clear_intr_mode(enic);
1150                 dev_err(dev, "Failed to set intr mode based on resource "\
1151                         "counts and system capabilities, aborting\n");
1152                 return err;
1153         }
1154 #endif
1155
1156         vnic_dev_set_reset_flag(enic->vdev, 0);
1157
1158         return 0;
1159
1160 }
1161
1162 int enic_probe(struct enic *enic)
1163 {
1164         struct rte_pci_device *pdev = enic->pdev;
1165         int err = -1;
1166
1167         dev_info(enic, " Initializing ENIC PMD version %s\n", DRV_VERSION);
1168
1169         enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1170         enic->bar0.len = pdev->mem_resource[0].len;
1171
1172         /* Register vNIC device */
1173         enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1174         if (!enic->vdev) {
1175                 dev_err(enic, "vNIC registration failed, aborting\n");
1176                 goto err_out;
1177         }
1178
1179         vnic_register_cbacks(enic->vdev,
1180                 enic_alloc_consistent,
1181                 enic_free_consistent);
1182
1183         /* Issue device open to get device in known state */
1184         err = enic_dev_open(enic);
1185         if (err) {
1186                 dev_err(enic, "vNIC dev open failed, aborting\n");
1187                 goto err_out_unregister;
1188         }
1189
1190         /* Set ingress vlan rewrite mode before vnic initialization */
1191         err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1192                 IG_VLAN_REWRITE_MODE_PRIORITY_TAG_DEFAULT_VLAN);
1193         if (err) {
1194                 dev_err(enic,
1195                         "Failed to set ingress vlan rewrite mode, aborting.\n");
1196                 goto err_out_dev_close;
1197         }
1198
1199         /* Issue device init to initialize the vnic-to-switch link.
1200          * We'll start with carrier off and wait for link UP
1201          * notification later to turn on carrier.  We don't need
1202          * to wait here for the vnic-to-switch link initialization
1203          * to complete; link UP notification is the indication that
1204          * the process is complete.
1205          */
1206
1207         err = vnic_dev_init(enic->vdev, 0);
1208         if (err) {
1209                 dev_err(enic, "vNIC dev init failed, aborting\n");
1210                 goto err_out_dev_close;
1211         }
1212
1213         err = enic_dev_init(enic);
1214         if (err) {
1215                 dev_err(enic, "Device initialization failed, aborting\n");
1216                 goto err_out_dev_close;
1217         }
1218
1219         return 0;
1220
1221 err_out_dev_close:
1222         vnic_dev_close(enic->vdev);
1223 err_out_unregister:
1224         vnic_dev_unregister(enic->vdev);
1225 err_out:
1226         return err;
1227 }
1228
1229 void enic_remove(struct enic *enic)
1230 {
1231         enic_dev_deinit(enic);
1232         vnic_dev_close(enic->vdev);
1233         vnic_dev_unregister(enic->vdev);
1234 }