enic: fix vfio inclusion
[dpdk.git] / lib / librte_pmd_enic / enic_main.c
1 /*
2  * Copyright 2008-2014 Cisco Systems, Inc.  All rights reserved.
3  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
4  *
5  * Copyright (c) 2014, Cisco Systems, Inc.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  */
34 #ident "$Id$"
35
36 #include <stdio.h>
37
38 #include <sys/stat.h>
39 #include <sys/mman.h>
40 #include <fcntl.h>
41 #include <libgen.h>
42
43 #include <rte_pci.h>
44 #include <rte_memzone.h>
45 #include <rte_malloc.h>
46 #include <rte_mbuf.h>
47 #include <rte_string_fns.h>
48 #include <rte_ethdev.h>
49
50 #include "enic_compat.h"
51 #include "enic.h"
52 #include "wq_enet_desc.h"
53 #include "rq_enet_desc.h"
54 #include "cq_enet_desc.h"
55 #include "vnic_enet.h"
56 #include "vnic_dev.h"
57 #include "vnic_wq.h"
58 #include "vnic_rq.h"
59 #include "vnic_cq.h"
60 #include "vnic_intr.h"
61 #include "vnic_nic.h"
62
63 static inline int enic_is_sriov_vf(struct enic *enic)
64 {
65         return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
66 }
67
68 static int is_zero_addr(char *addr)
69 {
70         return !(addr[0] |  addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
71 }
72
73 static int is_mcast_addr(char *addr)
74 {
75         return addr[0] & 1;
76 }
77
78 static int is_eth_addr_valid(char *addr)
79 {
80         return !is_mcast_addr(addr) && !is_zero_addr(addr);
81 }
82
83 static inline struct rte_mbuf *
84 enic_rxmbuf_alloc(struct rte_mempool *mp)
85 {
86         struct rte_mbuf *m;
87
88         m = __rte_mbuf_raw_alloc(mp);
89         __rte_mbuf_sanity_check_raw(m, 0);
90         return m;
91 }
92
93 static const struct rte_memzone *ring_dma_zone_reserve(
94         struct rte_eth_dev *dev, const char *ring_name,
95         uint16_t queue_id, uint32_t ring_size, int socket_id)
96 {
97         char z_name[RTE_MEMZONE_NAMESIZE];
98         const struct rte_memzone *mz;
99
100         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
101                 dev->driver->pci_drv.name, ring_name,
102                 dev->data->port_id, queue_id);
103
104         mz = rte_memzone_lookup(z_name);
105         if (mz)
106                 return mz;
107
108         return rte_memzone_reserve_aligned(z_name, (uint64_t) ring_size,
109                 socket_id, RTE_MEMZONE_1GB, ENIC_ALIGN);
110 }
111
112 void enic_set_hdr_split_size(struct enic *enic, u16 split_hdr_size)
113 {
114         vnic_set_hdr_split_size(enic->vdev, split_hdr_size);
115 }
116
117 static void enic_free_wq_buf(struct vnic_wq *wq, struct vnic_wq_buf *buf)
118 {
119         struct rte_mbuf *mbuf = (struct rte_mbuf *)buf->os_buf;
120
121         rte_mempool_put(mbuf->pool, mbuf);
122         buf->os_buf = NULL;
123 }
124
125 static void enic_wq_free_buf(struct vnic_wq *wq,
126         struct cq_desc *cq_desc, struct vnic_wq_buf *buf, void *opaque)
127 {
128         enic_free_wq_buf(wq, buf);
129 }
130
131 static int enic_wq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc,
132         u8 type, u16 q_number, u16 completed_index, void *opaque)
133 {
134         struct enic *enic = vnic_dev_priv(vdev);
135
136         vnic_wq_service(&enic->wq[q_number], cq_desc,
137                 completed_index, enic_wq_free_buf,
138                 opaque);
139
140         return 0;
141 }
142
143 static void enic_log_q_error(struct enic *enic)
144 {
145         unsigned int i;
146         u32 error_status;
147
148         for (i = 0; i < enic->wq_count; i++) {
149                 error_status = vnic_wq_error_status(&enic->wq[i]);
150                 if (error_status)
151                         dev_err(enic, "WQ[%d] error_status %d\n", i,
152                                 error_status);
153         }
154
155         for (i = 0; i < enic->rq_count; i++) {
156                 error_status = vnic_rq_error_status(&enic->rq[i]);
157                 if (error_status)
158                         dev_err(enic, "RQ[%d] error_status %d\n", i,
159                                 error_status);
160         }
161 }
162
163 unsigned int enic_cleanup_wq(struct enic *enic, struct vnic_wq *wq)
164 {
165         unsigned int cq = enic_cq_wq(enic, wq->index);
166
167         /* Return the work done */
168         return vnic_cq_service(&enic->cq[cq],
169                 -1 /*wq_work_to_do*/, enic_wq_service, NULL);
170 }
171
172
173 int enic_send_pkt(struct enic *enic, struct vnic_wq *wq,
174         struct rte_mbuf *tx_pkt, unsigned short len,
175         u_int8_t sop, u_int8_t eop,
176         u_int16_t ol_flags, u_int16_t vlan_tag)
177 {
178         struct wq_enet_desc *desc = vnic_wq_next_desc(wq);
179         u_int16_t mss = 0;
180         u_int16_t header_length = 0;
181         u_int8_t cq_entry = eop;
182         u_int8_t vlan_tag_insert = 0;
183         unsigned char *buf = (unsigned char *)(tx_pkt->buf_addr) +
184             RTE_PKTMBUF_HEADROOM;
185         u_int64_t bus_addr = (dma_addr_t)
186             (tx_pkt->buf_physaddr + RTE_PKTMBUF_HEADROOM);
187
188         if (sop) {
189                 if (ol_flags & PKT_TX_VLAN_PKT)
190                         vlan_tag_insert = 1;
191
192                 if (enic->hw_ip_checksum) {
193                         if (ol_flags & PKT_TX_IP_CKSUM)
194                                 mss |= ENIC_CALC_IP_CKSUM;
195
196                         if (ol_flags & PKT_TX_TCP_UDP_CKSUM)
197                                 mss |= ENIC_CALC_TCP_UDP_CKSUM;
198                 }
199         }
200
201         wq_enet_desc_enc(desc,
202                 bus_addr,
203                 len,
204                 mss,
205                 0 /* header_length */,
206                 0 /* offload_mode WQ_ENET_OFFLOAD_MODE_CSUM */,
207                 eop,
208                 cq_entry,
209                 0 /* fcoe_encap */,
210                 vlan_tag_insert,
211                 vlan_tag,
212                 0 /* loopback */);
213
214         vnic_wq_post(wq, (void *)tx_pkt, bus_addr, len,
215                 sop, eop,
216                 1 /*desc_skip_cnt*/,
217                 cq_entry,
218                 0 /*compressed send*/,
219                 0 /*wrid*/);
220
221         return 0;
222 }
223
224 void enic_dev_stats_clear(struct enic *enic)
225 {
226         if (vnic_dev_stats_clear(enic->vdev))
227                 dev_err(enic, "Error in clearing stats\n");
228 }
229
230 void enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
231 {
232         struct vnic_stats *stats;
233
234         memset(r_stats, 0, sizeof(*r_stats));
235         if (vnic_dev_stats_dump(enic->vdev, &stats)) {
236                 dev_err(enic, "Error in getting stats\n");
237                 return;
238         }
239
240         r_stats->ipackets = stats->rx.rx_frames_ok;
241         r_stats->opackets = stats->tx.tx_frames_ok;
242
243         r_stats->ibytes = stats->rx.rx_bytes_ok;
244         r_stats->obytes = stats->tx.tx_bytes_ok;
245
246         r_stats->ierrors = stats->rx.rx_errors;
247         r_stats->oerrors = stats->tx.tx_errors;
248
249         r_stats->imcasts = stats->rx.rx_multicast_frames_ok;
250         r_stats->rx_nombuf = stats->rx.rx_no_bufs;
251 }
252
253 void enic_del_mac_address(struct enic *enic)
254 {
255         if (vnic_dev_del_addr(enic->vdev, enic->mac_addr))
256                 dev_err(enic, "del mac addr failed\n");
257 }
258
259 void enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
260 {
261         int err;
262
263         if (!is_eth_addr_valid(mac_addr)) {
264                 dev_err(enic, "invalid mac address\n");
265                 return;
266         }
267
268         err = vnic_dev_del_addr(enic->vdev, mac_addr);
269         if (err) {
270                 dev_err(enic, "del mac addr failed\n");
271                 return;
272         }
273
274         ether_addr_copy((struct ether_addr *)mac_addr,
275                 (struct ether_addr *)enic->mac_addr);
276
277         err = vnic_dev_add_addr(enic->vdev, mac_addr);
278         if (err) {
279                 dev_err(enic, "add mac addr failed\n");
280                 return;
281         }
282 }
283
284 static void enic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf)
285 {
286         struct enic *enic = vnic_dev_priv(rq->vdev);
287
288         if (!buf->os_buf)
289                 return;
290
291         rte_pktmbuf_free((struct rte_mbuf *)buf->os_buf);
292         buf->os_buf = NULL;
293 }
294
295 void enic_init_vnic_resources(struct enic *enic)
296 {
297         unsigned int error_interrupt_enable = 1;
298         unsigned int error_interrupt_offset = 0;
299         int index = 0;
300         unsigned int cq_index = 0;
301
302         for (index = 0; index < enic->rq_count; index++) {
303                 vnic_rq_init(&enic->rq[index],
304                         enic_cq_rq(enic, index),
305                         error_interrupt_enable,
306                         error_interrupt_offset);
307         }
308
309         for (index = 0; index < enic->wq_count; index++) {
310                 vnic_wq_init(&enic->wq[index],
311                         enic_cq_wq(enic, index),
312                         error_interrupt_enable,
313                         error_interrupt_offset);
314         }
315
316         vnic_dev_stats_clear(enic->vdev);
317
318         for (index = 0; index < enic->cq_count; index++) {
319                 vnic_cq_init(&enic->cq[index],
320                         0 /* flow_control_enable */,
321                         1 /* color_enable */,
322                         0 /* cq_head */,
323                         0 /* cq_tail */,
324                         1 /* cq_tail_color */,
325                         0 /* interrupt_enable */,
326                         1 /* cq_entry_enable */,
327                         0 /* cq_message_enable */,
328                         0 /* interrupt offset */,
329                         0 /* cq_message_addr */);
330         }
331
332         vnic_intr_init(&enic->intr,
333                 enic->config.intr_timer_usec,
334                 enic->config.intr_timer_type,
335                 /*mask_on_assertion*/1);
336 }
337
338
339 static int enic_rq_alloc_buf(struct vnic_rq *rq)
340 {
341         struct enic *enic = vnic_dev_priv(rq->vdev);
342         void *buf;
343         dma_addr_t dma_addr;
344         struct rq_enet_desc *desc = vnic_rq_next_desc(rq);
345         u_int8_t type = RQ_ENET_TYPE_ONLY_SOP;
346         u_int16_t len = ENIC_MAX_MTU + VLAN_ETH_HLEN;
347         u16 split_hdr_size = vnic_get_hdr_split_size(enic->vdev);
348         struct rte_mbuf *mbuf = enic_rxmbuf_alloc(rq->mp);
349         struct rte_mbuf *hdr_mbuf = NULL;
350
351         if (!mbuf) {
352                 dev_err(enic, "mbuf alloc in enic_rq_alloc_buf failed\n");
353                 return -1;
354         }
355
356         if (unlikely(split_hdr_size)) {
357                 if (vnic_rq_desc_avail(rq) < 2) {
358                         rte_mempool_put(mbuf->pool, mbuf);
359                         return -1;
360                 }
361                 hdr_mbuf = enic_rxmbuf_alloc(rq->mp);
362                 if (!hdr_mbuf) {
363                         rte_mempool_put(mbuf->pool, mbuf);
364                         dev_err(enic,
365                                 "hdr_mbuf alloc in enic_rq_alloc_buf failed\n");
366                         return -1;
367                 }
368
369                 hdr_mbuf->data_off = RTE_PKTMBUF_HEADROOM;
370                 buf = rte_pktmbuf_mtod(hdr_mbuf, void *);
371
372                 hdr_mbuf->nb_segs = 2;
373                 hdr_mbuf->port = rq->index;
374                 hdr_mbuf->next = mbuf;
375
376                 dma_addr = (dma_addr_t)
377                     (hdr_mbuf->buf_physaddr + hdr_mbuf->data_off);
378
379                 rq_enet_desc_enc(desc, dma_addr, type, split_hdr_size);
380
381                 vnic_rq_post(rq, (void *)hdr_mbuf, 0 /*os_buf_index*/, dma_addr,
382                         (unsigned int)split_hdr_size, 0 /*wrid*/);
383
384                 desc = vnic_rq_next_desc(rq);
385                 type = RQ_ENET_TYPE_NOT_SOP;
386         } else {
387                 mbuf->nb_segs = 1;
388                 mbuf->port = rq->index;
389         }
390
391         mbuf->data_off = RTE_PKTMBUF_HEADROOM;
392         buf = rte_pktmbuf_mtod(mbuf, void *);
393         mbuf->next = NULL;
394
395         dma_addr = (dma_addr_t)
396             (mbuf->buf_physaddr + mbuf->data_off);
397
398         rq_enet_desc_enc(desc, dma_addr, type, mbuf->buf_len);
399
400         vnic_rq_post(rq, (void *)mbuf, 0 /*os_buf_index*/, dma_addr,
401                 (unsigned int)mbuf->buf_len, 0 /*wrid*/);
402
403         return 0;
404 }
405
406 static int enic_rq_indicate_buf(struct vnic_rq *rq,
407         struct cq_desc *cq_desc, struct vnic_rq_buf *buf,
408         int skipped, void *opaque)
409 {
410         struct enic *enic = vnic_dev_priv(rq->vdev);
411         struct rte_mbuf **rx_pkt_bucket = (struct rte_mbuf **)opaque;
412         struct rte_mbuf *rx_pkt = NULL;
413         struct rte_mbuf *hdr_rx_pkt = NULL;
414
415         u8 type, color, eop, sop, ingress_port, vlan_stripped;
416         u8 fcoe, fcoe_sof, fcoe_fc_crc_ok, fcoe_enc_error, fcoe_eof;
417         u8 tcp_udp_csum_ok, udp, tcp, ipv4_csum_ok;
418         u8 ipv6, ipv4, ipv4_fragment, fcs_ok, rss_type, csum_not_calc;
419         u8 packet_error;
420         u16 q_number, completed_index, bytes_written, vlan_tci, checksum;
421         u32 rss_hash;
422
423         cq_enet_rq_desc_dec((struct cq_enet_rq_desc *)cq_desc,
424                 &type, &color, &q_number, &completed_index,
425                 &ingress_port, &fcoe, &eop, &sop, &rss_type,
426                 &csum_not_calc, &rss_hash, &bytes_written,
427                 &packet_error, &vlan_stripped, &vlan_tci, &checksum,
428                 &fcoe_sof, &fcoe_fc_crc_ok, &fcoe_enc_error,
429                 &fcoe_eof, &tcp_udp_csum_ok, &udp, &tcp,
430                 &ipv4_csum_ok, &ipv6, &ipv4, &ipv4_fragment,
431                 &fcs_ok);
432
433         if (packet_error) {
434                 dev_err(enic, "packet error\n");
435                 return;
436         }
437
438         rx_pkt = (struct rte_mbuf *)buf->os_buf;
439         buf->os_buf = NULL;
440
441         if (unlikely(skipped)) {
442                 rx_pkt->data_len = 0;
443                 return 0;
444         }
445
446         if (likely(!vnic_get_hdr_split_size(enic->vdev))) {
447                 /* No header split configured */
448                 *rx_pkt_bucket = rx_pkt;
449                 rx_pkt->pkt_len = bytes_written;
450
451                 if (ipv4) {
452                         rx_pkt->ol_flags |= PKT_RX_IPV4_HDR;
453                         if (!csum_not_calc) {
454                                 if (unlikely(!ipv4_csum_ok))
455                                         rx_pkt->ol_flags |= PKT_RX_IP_CKSUM_BAD;
456
457                                 if ((tcp || udp) && (!tcp_udp_csum_ok))
458                                         rx_pkt->ol_flags |= PKT_RX_L4_CKSUM_BAD;
459                         }
460                 } else if (ipv6)
461                         rx_pkt->ol_flags |= PKT_RX_IPV6_HDR;
462         } else {
463                 /* Header split */
464                 if (sop && !eop) {
465                         /* This piece is header */
466                         *rx_pkt_bucket = rx_pkt;
467                         rx_pkt->pkt_len = bytes_written;
468                 } else {
469                         if (sop && eop) {
470                                 /* The packet is smaller than split_hdr_size */
471                                 *rx_pkt_bucket = rx_pkt;
472                                 rx_pkt->pkt_len = bytes_written;
473                                 if (ipv4) {
474                                         rx_pkt->ol_flags |= PKT_RX_IPV4_HDR;
475                                         if (!csum_not_calc) {
476                                                 if (unlikely(!ipv4_csum_ok))
477                                                         rx_pkt->ol_flags |=
478                                                             PKT_RX_IP_CKSUM_BAD;
479
480                                                 if ((tcp || udp) &&
481                                                     (!tcp_udp_csum_ok))
482                                                         rx_pkt->ol_flags |=
483                                                             PKT_RX_L4_CKSUM_BAD;
484                                         }
485                                 } else if (ipv6)
486                                         rx_pkt->ol_flags |= PKT_RX_IPV6_HDR;
487                         } else {
488                                 /* Payload */
489                                 hdr_rx_pkt = *rx_pkt_bucket;
490                                 hdr_rx_pkt->pkt_len += bytes_written;
491                                 if (ipv4) {
492                                         hdr_rx_pkt->ol_flags |= PKT_RX_IPV4_HDR;
493                                         if (!csum_not_calc) {
494                                                 if (unlikely(!ipv4_csum_ok))
495                                                         hdr_rx_pkt->ol_flags |=
496                                                             PKT_RX_IP_CKSUM_BAD;
497
498                                                 if ((tcp || udp) &&
499                                                     (!tcp_udp_csum_ok))
500                                                         hdr_rx_pkt->ol_flags |=
501                                                             PKT_RX_L4_CKSUM_BAD;
502                                         }
503                                 } else if (ipv6)
504                                         hdr_rx_pkt->ol_flags |= PKT_RX_IPV6_HDR;
505
506                         }
507                 }
508         }
509
510         rx_pkt->data_len = bytes_written;
511
512         if (rss_hash) {
513                 rx_pkt->ol_flags |= PKT_RX_RSS_HASH;
514                 rx_pkt->hash.rss = rss_hash;
515         }
516
517         if (vlan_tci) {
518                 rx_pkt->ol_flags |= PKT_RX_VLAN_PKT;
519                 rx_pkt->vlan_tci = vlan_tci;
520         }
521
522         return eop;
523 }
524
525 static int enic_rq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc,
526         u8 type, u16 q_number, u16 completed_index, void *opaque)
527 {
528         struct enic *enic = vnic_dev_priv(vdev);
529
530         return vnic_rq_service(&enic->rq[q_number], cq_desc,
531                 completed_index, VNIC_RQ_RETURN_DESC,
532                 enic_rq_indicate_buf, opaque);
533
534 }
535
536 int enic_poll(struct vnic_rq *rq, struct rte_mbuf **rx_pkts,
537         unsigned int budget, unsigned int *work_done)
538 {
539         struct enic *enic = vnic_dev_priv(rq->vdev);
540         unsigned int cq = enic_cq_rq(enic, rq->index);
541         int err = 0;
542
543         *work_done = vnic_cq_service(&enic->cq[cq],
544                 budget, enic_rq_service, (void *)rx_pkts);
545
546         if (*work_done) {
547                 vnic_rq_fill(rq, enic_rq_alloc_buf);
548
549                 /* Need at least one buffer on ring to get going */
550                 if (vnic_rq_desc_used(rq) == 0) {
551                         dev_err(enic, "Unable to alloc receive buffers\n");
552                         err = -1;
553                 }
554         }
555         return err;
556 }
557
558 void *enic_alloc_consistent(void *priv, size_t size,
559         dma_addr_t *dma_handle, u8 *name)
560 {
561         struct enic *enic = (struct enic *)priv;
562         void *vaddr;
563         const struct rte_memzone *rz;
564         *dma_handle = 0;
565
566         rz = rte_memzone_reserve_aligned(name, size, 0, 0, ENIC_ALIGN);
567         if (!rz) {
568                 pr_err("%s : Failed to allocate memory requested for %s",
569                         __func__, name);
570                 return NULL;
571         }
572
573         vaddr = rz->addr;
574         *dma_handle = (dma_addr_t)rz->phys_addr;
575
576         return vaddr;
577 }
578
579 void enic_free_consistent(struct rte_pci_device *hwdev, size_t size,
580         void *vaddr, dma_addr_t dma_handle)
581 {
582         /* Nothing to be done */
583 }
584
585 void enic_intr_handler(__rte_unused struct rte_intr_handle *handle,
586         void *arg)
587 {
588         struct enic *enic = pmd_priv((struct rte_eth_dev *)arg);
589
590         dev_err(enic, "Err intr.\n");
591         vnic_intr_return_all_credits(&enic->intr);
592
593         enic_log_q_error(enic);
594 }
595
596 int enic_enable(struct enic *enic)
597 {
598         int index;
599         void *res;
600         char mz_name[RTE_MEMZONE_NAMESIZE];
601         const struct rte_memzone *rmz;
602         struct rte_eth_dev *eth_dev = enic->rte_dev;
603
604         eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
605         eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
606         vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
607
608         if (enic_clsf_init(enic))
609                 dev_warning(enic, "Init of hash table for clsf failed."\
610                         "Flow director feature will not work\n");
611
612         /* Fill RQ bufs */
613         for (index = 0; index < enic->rq_count; index++) {
614                 vnic_rq_fill(&enic->rq[index], enic_rq_alloc_buf);
615
616                 /* Need at least one buffer on ring to get going
617                 */
618                 if (vnic_rq_desc_used(&enic->rq[index]) == 0) {
619                         dev_err(enic, "Unable to alloc receive buffers\n");
620                         return -1;
621                 }
622         }
623
624         for (index = 0; index < enic->wq_count; index++)
625                 vnic_wq_enable(&enic->wq[index]);
626         for (index = 0; index < enic->rq_count; index++)
627                 vnic_rq_enable(&enic->rq[index]);
628
629         vnic_dev_enable_wait(enic->vdev);
630
631 #ifndef VFIO_PRESENT
632         /* Register and enable error interrupt */
633         rte_intr_callback_register(&(enic->pdev->intr_handle),
634                 enic_intr_handler, (void *)enic->rte_dev);
635
636         rte_intr_enable(&(enic->pdev->intr_handle));
637 #endif
638         vnic_intr_unmask(&enic->intr);
639
640         return 0;
641 }
642
643 int enic_alloc_intr_resources(struct enic *enic)
644 {
645         int err;
646
647         dev_info(enic, "vNIC resources used:  "\
648                 "wq %d rq %d cq %d intr %d\n",
649                 enic->wq_count, enic->rq_count,
650                 enic->cq_count, enic->intr_count);
651
652         err = vnic_intr_alloc(enic->vdev, &enic->intr, 0);
653         if (err)
654                 enic_free_vnic_resources(enic);
655
656         return err;
657 }
658
659 void enic_free_rq(void *rxq)
660 {
661         struct vnic_rq *rq = (struct vnic_rq *)rxq;
662         struct enic *enic = vnic_dev_priv(rq->vdev);
663
664         vnic_rq_free(rq);
665         vnic_cq_free(&enic->cq[rq->index]);
666 }
667
668 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
669 {
670         vnic_wq_enable(&enic->wq[queue_idx]);
671 }
672
673 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
674 {
675         return vnic_wq_disable(&enic->wq[queue_idx]);
676 }
677
678 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
679 {
680         vnic_rq_enable(&enic->rq[queue_idx]);
681 }
682
683 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
684 {
685         return vnic_rq_disable(&enic->rq[queue_idx]);
686 }
687
688 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
689         unsigned int socket_id, struct rte_mempool *mp,
690         uint16_t nb_desc)
691 {
692         int err;
693         struct vnic_rq *rq = &enic->rq[queue_idx];
694
695         rq->socket_id = socket_id;
696         rq->mp = mp;
697
698         if (nb_desc) {
699                 if (nb_desc > enic->config.rq_desc_count) {
700                         dev_warning(enic,
701                                 "RQ %d - number of rx desc in cmd line (%d)"\
702                                 "is greater than that in the UCSM/CIMC adapter"\
703                                 "policy.  Applying the value in the adapter "\
704                                 "policy (%d).\n",
705                                 queue_idx, nb_desc, enic->config.rq_desc_count);
706                 } else if (nb_desc != enic->config.rq_desc_count) {
707                         enic->config.rq_desc_count = nb_desc;
708                         dev_info(enic,
709                                 "RX Queues - effective number of descs:%d\n",
710                                 nb_desc);
711                 }
712         }
713
714         /* Allocate queue resources */
715         err = vnic_rq_alloc(enic->vdev, &enic->rq[queue_idx], queue_idx,
716                 enic->config.rq_desc_count,
717                 sizeof(struct rq_enet_desc));
718         if (err) {
719                 dev_err(enic, "error in allocation of rq\n");
720                 return err;
721         }
722
723         err = vnic_cq_alloc(enic->vdev, &enic->cq[queue_idx], queue_idx,
724                 socket_id, enic->config.rq_desc_count,
725                 sizeof(struct cq_enet_rq_desc));
726         if (err) {
727                 vnic_rq_free(rq);
728                 dev_err(enic, "error in allocation of cq for rq\n");
729         }
730
731         return err;
732 }
733
734 void enic_free_wq(void *txq)
735 {
736         struct vnic_wq *wq = (struct vnic_wq *)txq;
737         struct enic *enic = vnic_dev_priv(wq->vdev);
738
739         vnic_wq_free(wq);
740         vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
741 }
742
743 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
744         unsigned int socket_id, uint16_t nb_desc)
745 {
746         int err;
747         struct vnic_wq *wq = &enic->wq[queue_idx];
748         unsigned int cq_index = enic_cq_wq(enic, queue_idx);
749
750         wq->socket_id = socket_id;
751         if (nb_desc) {
752                 if (nb_desc > enic->config.wq_desc_count) {
753                         dev_warning(enic,
754                                 "WQ %d - number of tx desc in cmd line (%d)"\
755                                 "is greater than that in the UCSM/CIMC adapter"\
756                                 "policy.  Applying the value in the adapter "\
757                                 "policy (%d)\n",
758                                 queue_idx, nb_desc, enic->config.wq_desc_count);
759                 } else if (nb_desc != enic->config.wq_desc_count) {
760                         enic->config.wq_desc_count = nb_desc;
761                         dev_info(enic,
762                                 "TX Queues - effective number of descs:%d\n",
763                                 nb_desc);
764                 }
765         }
766
767         /* Allocate queue resources */
768         err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
769                 enic->config.wq_desc_count,
770                 sizeof(struct wq_enet_desc));
771         if (err) {
772                 dev_err(enic, "error in allocation of wq\n");
773                 return err;
774         }
775
776         err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
777                 socket_id, enic->config.wq_desc_count,
778                 sizeof(struct cq_enet_wq_desc));
779         if (err) {
780                 vnic_wq_free(wq);
781                 dev_err(enic, "error in allocation of cq for wq\n");
782         }
783
784         return err;
785 }
786
787 int enic_disable(struct enic *enic)
788 {
789         unsigned int i;
790         int err;
791
792         vnic_intr_mask(&enic->intr);
793         (void)vnic_intr_masked(&enic->intr); /* flush write */
794
795         vnic_dev_disable(enic->vdev);
796
797         enic_clsf_destroy(enic);
798
799         if (!enic_is_sriov_vf(enic))
800                 vnic_dev_del_addr(enic->vdev, enic->mac_addr);
801
802         for (i = 0; i < enic->wq_count; i++) {
803                 err = vnic_wq_disable(&enic->wq[i]);
804                 if (err)
805                         return err;
806         }
807         for (i = 0; i < enic->rq_count; i++) {
808                 err = vnic_rq_disable(&enic->rq[i]);
809                 if (err)
810                         return err;
811         }
812
813         vnic_dev_set_reset_flag(enic->vdev, 1);
814         vnic_dev_notify_unset(enic->vdev);
815
816         for (i = 0; i < enic->wq_count; i++)
817                 vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
818         for (i = 0; i < enic->rq_count; i++)
819                 vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
820         for (i = 0; i < enic->cq_count; i++)
821                 vnic_cq_clean(&enic->cq[i]);
822         vnic_intr_clean(&enic->intr);
823
824         return 0;
825 }
826
827 static int enic_dev_wait(struct vnic_dev *vdev,
828         int (*start)(struct vnic_dev *, int),
829         int (*finished)(struct vnic_dev *, int *),
830         int arg)
831 {
832         int done;
833         int err;
834         int i;
835
836         err = start(vdev, arg);
837         if (err)
838                 return err;
839
840         /* Wait for func to complete...2 seconds max */
841         for (i = 0; i < 2000; i++) {
842                 err = finished(vdev, &done);
843                 if (err)
844                         return err;
845                 if (done)
846                         return 0;
847                 usleep(1000);
848         }
849         return -ETIMEDOUT;
850 }
851
852 static int enic_dev_open(struct enic *enic)
853 {
854         int err;
855
856         err = enic_dev_wait(enic->vdev, vnic_dev_open,
857                 vnic_dev_open_done, 0);
858         if (err)
859                 dev_err(enic_get_dev(enic),
860                         "vNIC device open failed, err %d\n", err);
861
862         return err;
863 }
864
865 static int enic_set_rsskey(struct enic *enic)
866 {
867         dma_addr_t rss_key_buf_pa;
868         union vnic_rss_key *rss_key_buf_va = NULL;
869         union vnic_rss_key rss_key = {
870                 .key[0].b = {85, 67, 83, 97, 119, 101, 115, 111, 109, 101},
871                 .key[1].b = {80, 65, 76, 79, 117, 110, 105, 113, 117, 101},
872                 .key[2].b = {76, 73, 78, 85, 88, 114, 111, 99, 107, 115},
873                 .key[3].b = {69, 78, 73, 67, 105, 115, 99, 111, 111, 108},
874         };
875         int err;
876         char name[NAME_MAX];
877
878         snprintf(name, NAME_MAX, "rss_key-%s", enic->bdf_name);
879         rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
880                 &rss_key_buf_pa, name);
881         if (!rss_key_buf_va)
882                 return -ENOMEM;
883
884         rte_memcpy(rss_key_buf_va, &rss_key, sizeof(union vnic_rss_key));
885
886         err = enic_set_rss_key(enic,
887                 rss_key_buf_pa,
888                 sizeof(union vnic_rss_key));
889
890         enic_free_consistent(enic->pdev, sizeof(union vnic_rss_key),
891                 rss_key_buf_va, rss_key_buf_pa);
892
893         return err;
894 }
895
896 static int enic_set_rsscpu(struct enic *enic, u8 rss_hash_bits)
897 {
898         dma_addr_t rss_cpu_buf_pa;
899         union vnic_rss_cpu *rss_cpu_buf_va = NULL;
900         unsigned int i;
901         int err;
902         char name[NAME_MAX];
903
904         snprintf(name, NAME_MAX, "rss_cpu-%s", enic->bdf_name);
905         rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
906                 &rss_cpu_buf_pa, name);
907         if (!rss_cpu_buf_va)
908                 return -ENOMEM;
909
910         for (i = 0; i < (1 << rss_hash_bits); i++)
911                 (*rss_cpu_buf_va).cpu[i/4].b[i%4] = i % enic->rq_count;
912
913         err = enic_set_rss_cpu(enic,
914                 rss_cpu_buf_pa,
915                 sizeof(union vnic_rss_cpu));
916
917         enic_free_consistent(enic->pdev, sizeof(union vnic_rss_cpu),
918                 rss_cpu_buf_va, rss_cpu_buf_pa);
919
920         return err;
921 }
922
923 static int enic_set_niccfg(struct enic *enic, u8 rss_default_cpu,
924         u8 rss_hash_type, u8 rss_hash_bits, u8 rss_base_cpu, u8 rss_enable)
925 {
926         const u8 tso_ipid_split_en = 0;
927         int err;
928
929         /* Enable VLAN tag stripping */
930
931         err = enic_set_nic_cfg(enic,
932                 rss_default_cpu, rss_hash_type,
933                 rss_hash_bits, rss_base_cpu,
934                 rss_enable, tso_ipid_split_en,
935                 enic->ig_vlan_strip_en);
936
937         return err;
938 }
939
940 int enic_set_rss_nic_cfg(struct enic *enic)
941 {
942         const u8 rss_default_cpu = 0;
943         const u8 rss_hash_type = NIC_CFG_RSS_HASH_TYPE_IPV4 |
944             NIC_CFG_RSS_HASH_TYPE_TCP_IPV4 |
945             NIC_CFG_RSS_HASH_TYPE_IPV6 |
946             NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
947         const u8 rss_hash_bits = 7;
948         const u8 rss_base_cpu = 0;
949         u8 rss_enable = ENIC_SETTING(enic, RSS) && (enic->rq_count > 1);
950
951         if (rss_enable) {
952                 if (!enic_set_rsskey(enic)) {
953                         if (enic_set_rsscpu(enic, rss_hash_bits)) {
954                                 rss_enable = 0;
955                                 dev_warning(enic, "RSS disabled, "\
956                                         "Failed to set RSS cpu indirection table.");
957                         }
958                 } else {
959                         rss_enable = 0;
960                         dev_warning(enic,
961                                 "RSS disabled, Failed to set RSS key.\n");
962                 }
963         }
964
965         return enic_set_niccfg(enic, rss_default_cpu, rss_hash_type,
966                 rss_hash_bits, rss_base_cpu, rss_enable);
967 }
968
969 int enic_setup_finish(struct enic *enic)
970 {
971         int ret;
972
973         ret = enic_set_rss_nic_cfg(enic);
974         if (ret) {
975                 dev_err(enic, "Failed to config nic, aborting.\n");
976                 return -1;
977         }
978
979         vnic_dev_add_addr(enic->vdev, enic->mac_addr);
980
981         /* Default conf */
982         vnic_dev_packet_filter(enic->vdev,
983                 1 /* directed  */,
984                 1 /* multicast */,
985                 1 /* broadcast */,
986                 0 /* promisc   */,
987                 1 /* allmulti  */);
988
989         enic->promisc = 0;
990         enic->allmulti = 1;
991
992         return 0;
993 }
994
995 #ifdef VFIO_PRESENT
996 static void enic_eventfd_init(struct enic *enic)
997 {
998         enic->eventfd = enic->pdev->intr_handle.fd;
999 }
1000
1001 void *enic_err_intr_handler(void *arg)
1002 {
1003         struct enic *enic = (struct enic *)arg;
1004         unsigned int intr = enic_msix_err_intr(enic);
1005         ssize_t size;
1006         uint64_t data;
1007
1008         while (1) {
1009                 size = read(enic->eventfd, &data, sizeof(data));
1010                 dev_err(enic, "Err intr.\n");
1011                 vnic_intr_return_all_credits(&enic->intr);
1012
1013                 enic_log_q_error(enic);
1014         }
1015
1016         return NULL;
1017 }
1018 #endif
1019
1020 void enic_add_packet_filter(struct enic *enic)
1021 {
1022         /* Args -> directed, multicast, broadcast, promisc, allmulti */
1023         vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1024                 enic->promisc, enic->allmulti);
1025 }
1026
1027 int enic_get_link_status(struct enic *enic)
1028 {
1029         return vnic_dev_link_status(enic->vdev);
1030 }
1031
1032
1033 #ifdef VFIO_PRESENT
1034 static int enic_create_err_intr_thread(struct enic *enic)
1035 {
1036         pthread_attr_t intr_attr;
1037
1038         /* create threads for error interrupt handling */
1039         pthread_attr_init(&intr_attr);
1040         pthread_attr_setstacksize(&intr_attr, 0x100000);
1041
1042         /* ERR */
1043         if (pthread_create(&enic->err_intr_thread, &intr_attr,
1044                     enic_err_intr_handler, (void *)enic)) {
1045                 dev_err(enic, "Failed to create err interrupt handler threads\n");
1046                 return -1;
1047         }
1048
1049         pthread_attr_destroy(&intr_attr);
1050
1051         return 0;
1052 }
1053
1054
1055 static int enic_set_intr_mode(struct enic *enic)
1056 {
1057         struct vfio_irq_set *irq_set;
1058         int *fds;
1059         int size;
1060         int ret = -1;
1061         int index;
1062
1063         if (enic->intr_count < 1) {
1064                 dev_err(enic, "Unsupported resource conf.\n");
1065                 return -1;
1066         }
1067         vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_MSIX);
1068
1069         enic->intr_count = 1;
1070
1071         enic_eventfd_init(enic);
1072         size = sizeof(*irq_set) + (sizeof(int));
1073
1074         irq_set = rte_zmalloc("enic_vfio_irq", size, 0);
1075         irq_set->argsz = size;
1076         irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
1077         irq_set->start = 0;
1078         irq_set->count = 1; /* For error interrupt only */
1079         irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
1080             VFIO_IRQ_SET_ACTION_TRIGGER;
1081         fds = (int *)&irq_set->data;
1082
1083         fds[0] = enic->eventfd;
1084
1085         ret = ioctl(enic->pdev->intr_handle.vfio_dev_fd,
1086                 VFIO_DEVICE_SET_IRQS, irq_set);
1087         rte_free(irq_set);
1088         if (ret) {
1089                 dev_err(enic, "Failed to set eventfds for interrupts\n");
1090                 return -1;
1091         }
1092
1093         enic_create_err_intr_thread(enic);
1094         return 0;
1095 }
1096
1097 static void enic_clear_intr_mode(struct enic *enic)
1098 {
1099         vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_UNKNOWN);
1100 }
1101 #endif
1102
1103 static void enic_dev_deinit(struct enic *enic)
1104 {
1105         unsigned int i;
1106         struct rte_eth_dev *eth_dev = enic->rte_dev;
1107
1108         if (eth_dev->data->mac_addrs)
1109                 rte_free(eth_dev->data->mac_addrs);
1110
1111 #ifdef VFIO_PRESENT
1112         enic_clear_intr_mode(enic);
1113 #endif
1114 }
1115
1116
1117 int enic_set_vnic_res(struct enic *enic)
1118 {
1119         struct rte_eth_dev *eth_dev = enic->rte_dev;
1120
1121         if ((enic->rq_count < eth_dev->data->nb_rx_queues) ||
1122                 (enic->wq_count < eth_dev->data->nb_tx_queues)) {
1123                 dev_err(dev, "Not enough resources configured, aborting\n");
1124                 return -1;
1125         }
1126
1127         enic->rq_count = eth_dev->data->nb_rx_queues;
1128         enic->wq_count = eth_dev->data->nb_tx_queues;
1129         if (enic->cq_count < (enic->rq_count + enic->wq_count)) {
1130                 dev_err(dev, "Not enough resources configured, aborting\n");
1131                 return -1;
1132         }
1133
1134         enic->cq_count = enic->rq_count + enic->wq_count;
1135         return 0;
1136 }
1137
1138 static int enic_dev_init(struct enic *enic)
1139 {
1140         unsigned int i;
1141         int err;
1142         struct rte_eth_dev *eth_dev = enic->rte_dev;
1143
1144         vnic_dev_intr_coal_timer_info_default(enic->vdev);
1145
1146         /* Get vNIC configuration
1147         */
1148         err = enic_get_vnic_config(enic);
1149         if (err) {
1150                 dev_err(dev, "Get vNIC configuration failed, aborting\n");
1151                 return err;
1152         }
1153
1154         eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr", ETH_ALEN, 0);
1155         if (!eth_dev->data->mac_addrs) {
1156                 dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1157                 return -1;
1158         }
1159         ether_addr_copy((struct ether_addr *) enic->mac_addr,
1160                 &eth_dev->data->mac_addrs[0]);
1161
1162
1163         /* Get available resource counts
1164         */
1165         enic_get_res_counts(enic);
1166
1167 #ifdef VFIO_PRESENT
1168         /* Set interrupt mode based on resource counts and system
1169          * capabilities
1170          */
1171         err = enic_set_intr_mode(enic);
1172         if (err) {
1173                 rte_free(eth_dev->data->mac_addrs);
1174                 enic_clear_intr_mode(enic);
1175                 dev_err(dev, "Failed to set intr mode based on resource "\
1176                         "counts and system capabilities, aborting\n");
1177                 return err;
1178         }
1179 #endif
1180
1181         vnic_dev_set_reset_flag(enic->vdev, 0);
1182
1183         return 0;
1184
1185 }
1186
1187 int enic_probe(struct enic *enic)
1188 {
1189         const char *bdf = enic->bdf_name;
1190         struct rte_pci_device *pdev = enic->pdev;
1191         struct rte_eth_dev *eth_dev = enic->rte_dev;
1192         unsigned int i;
1193         int err = -1;
1194
1195         dev_info(enic, " Initializing ENIC PMD version %s\n", DRV_VERSION);
1196
1197         enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1198         enic->bar0.len = pdev->mem_resource[0].len;
1199
1200         /* Register vNIC device */
1201         enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1202         if (!enic->vdev) {
1203                 dev_err(enic, "vNIC registration failed, aborting\n");
1204                 goto err_out;
1205         }
1206
1207         vnic_register_cbacks(enic->vdev,
1208                 enic_alloc_consistent,
1209                 enic_free_consistent);
1210
1211         /* Issue device open to get device in known state */
1212         err = enic_dev_open(enic);
1213         if (err) {
1214                 dev_err(enic, "vNIC dev open failed, aborting\n");
1215                 goto err_out_unregister;
1216         }
1217
1218         /* Set ingress vlan rewrite mode before vnic initialization */
1219         err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1220                 IG_VLAN_REWRITE_MODE_PRIORITY_TAG_DEFAULT_VLAN);
1221         if (err) {
1222                 dev_err(enic,
1223                         "Failed to set ingress vlan rewrite mode, aborting.\n");
1224                 goto err_out_dev_close;
1225         }
1226
1227         /* Issue device init to initialize the vnic-to-switch link.
1228          * We'll start with carrier off and wait for link UP
1229          * notification later to turn on carrier.  We don't need
1230          * to wait here for the vnic-to-switch link initialization
1231          * to complete; link UP notification is the indication that
1232          * the process is complete.
1233          */
1234
1235         err = vnic_dev_init(enic->vdev, 0);
1236         if (err) {
1237                 dev_err(enic, "vNIC dev init failed, aborting\n");
1238                 goto err_out_dev_close;
1239         }
1240
1241         err = enic_dev_init(enic);
1242         if (err) {
1243                 dev_err(enic, "Device initialization failed, aborting\n");
1244                 goto err_out_dev_close;
1245         }
1246
1247         return 0;
1248
1249 err_out_dev_close:
1250         vnic_dev_close(enic->vdev);
1251 err_out_unregister:
1252         vnic_dev_unregister(enic->vdev);
1253 err_out:
1254         return err;
1255 }
1256
1257 void enic_remove(struct enic *enic)
1258 {
1259         enic_dev_deinit(enic);
1260         vnic_dev_close(enic->vdev);
1261         vnic_dev_unregister(enic->vdev);
1262 }