eal: clean up interrupt handle
[dpdk.git] / drivers / net / avp / avp_ethdev.c
1 /*
2  *   BSD LICENSE
3  *
4  * Copyright (c) 2013-2017, Wind River Systems, Inc.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1) Redistributions of source code must retain the above copyright notice,
10  * this list of conditions and the following disclaimer.
11  *
12  * 2) Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * 3) Neither the name of Wind River Systems nor the names of its contributors
17  * may be used to endorse or promote products derived from this software
18  * without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <stdint.h>
34 #include <string.h>
35 #include <stdio.h>
36 #include <errno.h>
37 #include <unistd.h>
38
39 #include <rte_ethdev.h>
40 #include <rte_memcpy.h>
41 #include <rte_string_fns.h>
42 #include <rte_memzone.h>
43 #include <rte_malloc.h>
44 #include <rte_atomic.h>
45 #include <rte_branch_prediction.h>
46 #include <rte_pci.h>
47 #include <rte_ether.h>
48 #include <rte_common.h>
49 #include <rte_cycles.h>
50 #include <rte_spinlock.h>
51 #include <rte_byteorder.h>
52 #include <rte_dev.h>
53 #include <rte_memory.h>
54 #include <rte_eal.h>
55 #include <rte_io.h>
56
57 #include "rte_avp_common.h"
58 #include "rte_avp_fifo.h"
59
60 #include "avp_logs.h"
61
62
63 static int avp_dev_create(struct rte_pci_device *pci_dev,
64                           struct rte_eth_dev *eth_dev);
65
66 static int avp_dev_configure(struct rte_eth_dev *dev);
67 static int avp_dev_start(struct rte_eth_dev *dev);
68 static void avp_dev_stop(struct rte_eth_dev *dev);
69 static void avp_dev_close(struct rte_eth_dev *dev);
70 static void avp_dev_info_get(struct rte_eth_dev *dev,
71                              struct rte_eth_dev_info *dev_info);
72 static void avp_vlan_offload_set(struct rte_eth_dev *dev, int mask);
73 static int avp_dev_link_update(struct rte_eth_dev *dev,
74                                __rte_unused int wait_to_complete);
75 static void avp_dev_promiscuous_enable(struct rte_eth_dev *dev);
76 static void avp_dev_promiscuous_disable(struct rte_eth_dev *dev);
77
78 static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev,
79                                   uint16_t rx_queue_id,
80                                   uint16_t nb_rx_desc,
81                                   unsigned int socket_id,
82                                   const struct rte_eth_rxconf *rx_conf,
83                                   struct rte_mempool *pool);
84
85 static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev,
86                                   uint16_t tx_queue_id,
87                                   uint16_t nb_tx_desc,
88                                   unsigned int socket_id,
89                                   const struct rte_eth_txconf *tx_conf);
90
91 static uint16_t avp_recv_scattered_pkts(void *rx_queue,
92                                         struct rte_mbuf **rx_pkts,
93                                         uint16_t nb_pkts);
94
95 static uint16_t avp_recv_pkts(void *rx_queue,
96                               struct rte_mbuf **rx_pkts,
97                               uint16_t nb_pkts);
98
99 static uint16_t avp_xmit_scattered_pkts(void *tx_queue,
100                                         struct rte_mbuf **tx_pkts,
101                                         uint16_t nb_pkts);
102
103 static uint16_t avp_xmit_pkts(void *tx_queue,
104                               struct rte_mbuf **tx_pkts,
105                               uint16_t nb_pkts);
106
107 static void avp_dev_rx_queue_release(void *rxq);
108 static void avp_dev_tx_queue_release(void *txq);
109
110 static void avp_dev_stats_get(struct rte_eth_dev *dev,
111                               struct rte_eth_stats *stats);
112 static void avp_dev_stats_reset(struct rte_eth_dev *dev);
113
114
115 #define AVP_DEV_TO_PCI(eth_dev) RTE_DEV_TO_PCI((eth_dev)->device)
116
117
118 #define AVP_MAX_RX_BURST 64
119 #define AVP_MAX_TX_BURST 64
120 #define AVP_MAX_MAC_ADDRS 1
121 #define AVP_MIN_RX_BUFSIZE ETHER_MIN_LEN
122
123
124 /*
125  * Defines the number of microseconds to wait before checking the response
126  * queue for completion.
127  */
128 #define AVP_REQUEST_DELAY_USECS (5000)
129
130 /*
131  * Defines the number times to check the response queue for completion before
132  * declaring a timeout.
133  */
134 #define AVP_MAX_REQUEST_RETRY (100)
135
136 /* Defines the current PCI driver version number */
137 #define AVP_DPDK_DRIVER_VERSION RTE_AVP_CURRENT_GUEST_VERSION
138
139 /*
140  * The set of PCI devices this driver supports
141  */
142 static const struct rte_pci_id pci_id_avp_map[] = {
143         { .vendor_id = RTE_AVP_PCI_VENDOR_ID,
144           .device_id = RTE_AVP_PCI_DEVICE_ID,
145           .subsystem_vendor_id = RTE_AVP_PCI_SUB_VENDOR_ID,
146           .subsystem_device_id = RTE_AVP_PCI_SUB_DEVICE_ID,
147           .class_id = RTE_CLASS_ANY_ID,
148         },
149
150         { .vendor_id = 0, /* sentinel */
151         },
152 };
153
154 /*
155  * dev_ops for avp, bare necessities for basic operation
156  */
157 static const struct eth_dev_ops avp_eth_dev_ops = {
158         .dev_configure       = avp_dev_configure,
159         .dev_start           = avp_dev_start,
160         .dev_stop            = avp_dev_stop,
161         .dev_close           = avp_dev_close,
162         .dev_infos_get       = avp_dev_info_get,
163         .vlan_offload_set    = avp_vlan_offload_set,
164         .stats_get           = avp_dev_stats_get,
165         .stats_reset         = avp_dev_stats_reset,
166         .link_update         = avp_dev_link_update,
167         .promiscuous_enable  = avp_dev_promiscuous_enable,
168         .promiscuous_disable = avp_dev_promiscuous_disable,
169         .rx_queue_setup      = avp_dev_rx_queue_setup,
170         .rx_queue_release    = avp_dev_rx_queue_release,
171         .tx_queue_setup      = avp_dev_tx_queue_setup,
172         .tx_queue_release    = avp_dev_tx_queue_release,
173 };
174
175 /**@{ AVP device flags */
176 #define AVP_F_PROMISC (1 << 1)
177 #define AVP_F_CONFIGURED (1 << 2)
178 #define AVP_F_LINKUP (1 << 3)
179 #define AVP_F_DETACHED (1 << 4)
180 /**@} */
181
182 /* Ethernet device validation marker */
183 #define AVP_ETHDEV_MAGIC 0x92972862
184
185 /*
186  * Defines the AVP device attributes which are attached to an RTE ethernet
187  * device
188  */
189 struct avp_dev {
190         uint32_t magic; /**< Memory validation marker */
191         uint64_t device_id; /**< Unique system identifier */
192         struct ether_addr ethaddr; /**< Host specified MAC address */
193         struct rte_eth_dev_data *dev_data;
194         /**< Back pointer to ethernet device data */
195         volatile uint32_t flags; /**< Device operational flags */
196         uint8_t port_id; /**< Ethernet port identifier */
197         struct rte_mempool *pool; /**< pkt mbuf mempool */
198         unsigned int guest_mbuf_size; /**< local pool mbuf size */
199         unsigned int host_mbuf_size; /**< host mbuf size */
200         unsigned int max_rx_pkt_len; /**< maximum receive unit */
201         uint32_t host_features; /**< Supported feature bitmap */
202         uint32_t features; /**< Enabled feature bitmap */
203         unsigned int num_tx_queues; /**< Negotiated number of transmit queues */
204         unsigned int max_tx_queues; /**< Maximum number of transmit queues */
205         unsigned int num_rx_queues; /**< Negotiated number of receive queues */
206         unsigned int max_rx_queues; /**< Maximum number of receive queues */
207
208         struct rte_avp_fifo *tx_q[RTE_AVP_MAX_QUEUES]; /**< TX queue */
209         struct rte_avp_fifo *rx_q[RTE_AVP_MAX_QUEUES]; /**< RX queue */
210         struct rte_avp_fifo *alloc_q[RTE_AVP_MAX_QUEUES];
211         /**< Allocated mbufs queue */
212         struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES];
213         /**< To be freed mbufs queue */
214
215         /* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */
216         rte_spinlock_t lock;
217
218         /* For request & response */
219         struct rte_avp_fifo *req_q; /**< Request queue */
220         struct rte_avp_fifo *resp_q; /**< Response queue */
221         void *host_sync_addr; /**< (host) Req/Resp Mem address */
222         void *sync_addr; /**< Req/Resp Mem address */
223         void *host_mbuf_addr; /**< (host) MBUF pool start address */
224         void *mbuf_addr; /**< MBUF pool start address */
225 } __rte_cache_aligned;
226
227 /* RTE ethernet private data */
228 struct avp_adapter {
229         struct avp_dev avp;
230 } __rte_cache_aligned;
231
232
233 /* 32-bit MMIO register write */
234 #define AVP_WRITE32(_value, _addr) rte_write32_relaxed((_value), (_addr))
235
236 /* 32-bit MMIO register read */
237 #define AVP_READ32(_addr) rte_read32_relaxed((_addr))
238
239 /* Macro to cast the ethernet device private data to a AVP object */
240 #define AVP_DEV_PRIVATE_TO_HW(adapter) \
241         (&((struct avp_adapter *)adapter)->avp)
242
243 /*
244  * Defines the structure of a AVP device queue for the purpose of handling the
245  * receive and transmit burst callback functions
246  */
247 struct avp_queue {
248         struct rte_eth_dev_data *dev_data;
249         /**< Backpointer to ethernet device data */
250         struct avp_dev *avp; /**< Backpointer to AVP device */
251         uint16_t queue_id;
252         /**< Queue identifier used for indexing current queue */
253         uint16_t queue_base;
254         /**< Base queue identifier for queue servicing */
255         uint16_t queue_limit;
256         /**< Maximum queue identifier for queue servicing */
257
258         uint64_t packets;
259         uint64_t bytes;
260         uint64_t errors;
261 };
262
263 /* send a request and wait for a response
264  *
265  * @warning must be called while holding the avp->lock spinlock.
266  */
267 static int
268 avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request)
269 {
270         unsigned int retry = AVP_MAX_REQUEST_RETRY;
271         void *resp_addr = NULL;
272         unsigned int count;
273         int ret;
274
275         PMD_DRV_LOG(DEBUG, "Sending request %u to host\n", request->req_id);
276
277         request->result = -ENOTSUP;
278
279         /* Discard any stale responses before starting a new request */
280         while (avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1))
281                 PMD_DRV_LOG(DEBUG, "Discarding stale response\n");
282
283         rte_memcpy(avp->sync_addr, request, sizeof(*request));
284         count = avp_fifo_put(avp->req_q, &avp->host_sync_addr, 1);
285         if (count < 1) {
286                 PMD_DRV_LOG(ERR, "Cannot send request %u to host\n",
287                             request->req_id);
288                 ret = -EBUSY;
289                 goto done;
290         }
291
292         while (retry--) {
293                 /* wait for a response */
294                 usleep(AVP_REQUEST_DELAY_USECS);
295
296                 count = avp_fifo_count(avp->resp_q);
297                 if (count >= 1) {
298                         /* response received */
299                         break;
300                 }
301
302                 if ((count < 1) && (retry == 0)) {
303                         PMD_DRV_LOG(ERR, "Timeout while waiting for a response for %u\n",
304                                     request->req_id);
305                         ret = -ETIME;
306                         goto done;
307                 }
308         }
309
310         /* retrieve the response */
311         count = avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1);
312         if ((count != 1) || (resp_addr != avp->host_sync_addr)) {
313                 PMD_DRV_LOG(ERR, "Invalid response from host, count=%u resp=%p host_sync_addr=%p\n",
314                             count, resp_addr, avp->host_sync_addr);
315                 ret = -ENODATA;
316                 goto done;
317         }
318
319         /* copy to user buffer */
320         rte_memcpy(request, avp->sync_addr, sizeof(*request));
321         ret = 0;
322
323         PMD_DRV_LOG(DEBUG, "Result %d received for request %u\n",
324                     request->result, request->req_id);
325
326 done:
327         return ret;
328 }
329
330 static int
331 avp_dev_ctrl_set_link_state(struct rte_eth_dev *eth_dev, unsigned int state)
332 {
333         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
334         struct rte_avp_request request;
335         int ret;
336
337         /* setup a link state change request */
338         memset(&request, 0, sizeof(request));
339         request.req_id = RTE_AVP_REQ_CFG_NETWORK_IF;
340         request.if_up = state;
341
342         ret = avp_dev_process_request(avp, &request);
343
344         return ret == 0 ? request.result : ret;
345 }
346
347 static int
348 avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev,
349                         struct rte_avp_device_config *config)
350 {
351         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
352         struct rte_avp_request request;
353         int ret;
354
355         /* setup a configure request */
356         memset(&request, 0, sizeof(request));
357         request.req_id = RTE_AVP_REQ_CFG_DEVICE;
358         memcpy(&request.config, config, sizeof(request.config));
359
360         ret = avp_dev_process_request(avp, &request);
361
362         return ret == 0 ? request.result : ret;
363 }
364
365 static int
366 avp_dev_ctrl_shutdown(struct rte_eth_dev *eth_dev)
367 {
368         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
369         struct rte_avp_request request;
370         int ret;
371
372         /* setup a shutdown request */
373         memset(&request, 0, sizeof(request));
374         request.req_id = RTE_AVP_REQ_SHUTDOWN_DEVICE;
375
376         ret = avp_dev_process_request(avp, &request);
377
378         return ret == 0 ? request.result : ret;
379 }
380
381 /* translate from host mbuf virtual address to guest virtual address */
382 static inline void *
383 avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address)
384 {
385         return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address,
386                                        (uintptr_t)avp->host_mbuf_addr),
387                            (uintptr_t)avp->mbuf_addr);
388 }
389
390 /* translate from host physical address to guest virtual address */
391 static void *
392 avp_dev_translate_address(struct rte_eth_dev *eth_dev,
393                           phys_addr_t host_phys_addr)
394 {
395         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
396         struct rte_mem_resource *resource;
397         struct rte_avp_memmap_info *info;
398         struct rte_avp_memmap *map;
399         off_t offset;
400         void *addr;
401         unsigned int i;
402
403         addr = pci_dev->mem_resource[RTE_AVP_PCI_MEMORY_BAR].addr;
404         resource = &pci_dev->mem_resource[RTE_AVP_PCI_MEMMAP_BAR];
405         info = (struct rte_avp_memmap_info *)resource->addr;
406
407         offset = 0;
408         for (i = 0; i < info->nb_maps; i++) {
409                 /* search all segments looking for a matching address */
410                 map = &info->maps[i];
411
412                 if ((host_phys_addr >= map->phys_addr) &&
413                         (host_phys_addr < (map->phys_addr + map->length))) {
414                         /* address is within this segment */
415                         offset += (host_phys_addr - map->phys_addr);
416                         addr = RTE_PTR_ADD(addr, offset);
417
418                         PMD_DRV_LOG(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p\n",
419                                     host_phys_addr, addr);
420
421                         return addr;
422                 }
423                 offset += map->length;
424         }
425
426         return NULL;
427 }
428
429 /* verify that the incoming device version is compatible with our version */
430 static int
431 avp_dev_version_check(uint32_t version)
432 {
433         uint32_t driver = RTE_AVP_STRIP_MINOR_VERSION(AVP_DPDK_DRIVER_VERSION);
434         uint32_t device = RTE_AVP_STRIP_MINOR_VERSION(version);
435
436         if (device <= driver) {
437                 /* the host driver version is less than or equal to ours */
438                 return 0;
439         }
440
441         return 1;
442 }
443
444 /* verify that memory regions have expected version and validation markers */
445 static int
446 avp_dev_check_regions(struct rte_eth_dev *eth_dev)
447 {
448         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
449         struct rte_avp_memmap_info *memmap;
450         struct rte_avp_device_info *info;
451         struct rte_mem_resource *resource;
452         unsigned int i;
453
454         /* Dump resource info for debug */
455         for (i = 0; i < PCI_MAX_RESOURCE; i++) {
456                 resource = &pci_dev->mem_resource[i];
457                 if ((resource->phys_addr == 0) || (resource->len == 0))
458                         continue;
459
460                 PMD_DRV_LOG(DEBUG, "resource[%u]: phys=0x%" PRIx64 " len=%" PRIu64 " addr=%p\n",
461                             i, resource->phys_addr,
462                             resource->len, resource->addr);
463
464                 switch (i) {
465                 case RTE_AVP_PCI_MEMMAP_BAR:
466                         memmap = (struct rte_avp_memmap_info *)resource->addr;
467                         if ((memmap->magic != RTE_AVP_MEMMAP_MAGIC) ||
468                             (memmap->version != RTE_AVP_MEMMAP_VERSION)) {
469                                 PMD_DRV_LOG(ERR, "Invalid memmap magic 0x%08x and version %u\n",
470                                             memmap->magic, memmap->version);
471                                 return -EINVAL;
472                         }
473                         break;
474
475                 case RTE_AVP_PCI_DEVICE_BAR:
476                         info = (struct rte_avp_device_info *)resource->addr;
477                         if ((info->magic != RTE_AVP_DEVICE_MAGIC) ||
478                             avp_dev_version_check(info->version)) {
479                                 PMD_DRV_LOG(ERR, "Invalid device info magic 0x%08x or version 0x%08x > 0x%08x\n",
480                                             info->magic, info->version,
481                                             AVP_DPDK_DRIVER_VERSION);
482                                 return -EINVAL;
483                         }
484                         break;
485
486                 case RTE_AVP_PCI_MEMORY_BAR:
487                 case RTE_AVP_PCI_MMIO_BAR:
488                         if (resource->addr == NULL) {
489                                 PMD_DRV_LOG(ERR, "Missing address space for BAR%u\n",
490                                             i);
491                                 return -EINVAL;
492                         }
493                         break;
494
495                 case RTE_AVP_PCI_MSIX_BAR:
496                 default:
497                         /* no validation required */
498                         break;
499                 }
500         }
501
502         return 0;
503 }
504
505 static int
506 avp_dev_detach(struct rte_eth_dev *eth_dev)
507 {
508         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
509         int ret;
510
511         PMD_DRV_LOG(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "\n",
512                     eth_dev->data->port_id, avp->device_id);
513
514         rte_spinlock_lock(&avp->lock);
515
516         if (avp->flags & AVP_F_DETACHED) {
517                 PMD_DRV_LOG(NOTICE, "port %u already detached\n",
518                             eth_dev->data->port_id);
519                 ret = 0;
520                 goto unlock;
521         }
522
523         /* shutdown the device first so the host stops sending us packets. */
524         ret = avp_dev_ctrl_shutdown(eth_dev);
525         if (ret < 0) {
526                 PMD_DRV_LOG(ERR, "Failed to send/recv shutdown to host, ret=%d\n",
527                             ret);
528                 avp->flags &= ~AVP_F_DETACHED;
529                 goto unlock;
530         }
531
532         avp->flags |= AVP_F_DETACHED;
533         rte_wmb();
534
535         /* wait for queues to acknowledge the presence of the detach flag */
536         rte_delay_ms(1);
537
538         ret = 0;
539
540 unlock:
541         rte_spinlock_unlock(&avp->lock);
542         return ret;
543 }
544
545 static void
546 _avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
547 {
548         struct avp_dev *avp =
549                 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
550         struct avp_queue *rxq;
551         uint16_t queue_count;
552         uint16_t remainder;
553
554         rxq = (struct avp_queue *)eth_dev->data->rx_queues[rx_queue_id];
555
556         /*
557          * Must map all AVP fifos as evenly as possible between the configured
558          * device queues.  Each device queue will service a subset of the AVP
559          * fifos. If there is an odd number of device queues the first set of
560          * device queues will get the extra AVP fifos.
561          */
562         queue_count = avp->num_rx_queues / eth_dev->data->nb_rx_queues;
563         remainder = avp->num_rx_queues % eth_dev->data->nb_rx_queues;
564         if (rx_queue_id < remainder) {
565                 /* these queues must service one extra FIFO */
566                 rxq->queue_base = rx_queue_id * (queue_count + 1);
567                 rxq->queue_limit = rxq->queue_base + (queue_count + 1) - 1;
568         } else {
569                 /* these queues service the regular number of FIFO */
570                 rxq->queue_base = ((remainder * (queue_count + 1)) +
571                                    ((rx_queue_id - remainder) * queue_count));
572                 rxq->queue_limit = rxq->queue_base + queue_count - 1;
573         }
574
575         PMD_DRV_LOG(DEBUG, "rxq %u at %p base %u limit %u\n",
576                     rx_queue_id, rxq, rxq->queue_base, rxq->queue_limit);
577
578         rxq->queue_id = rxq->queue_base;
579 }
580
581 static void
582 _avp_set_queue_counts(struct rte_eth_dev *eth_dev)
583 {
584         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
585         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
586         struct rte_avp_device_info *host_info;
587         void *addr;
588
589         addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
590         host_info = (struct rte_avp_device_info *)addr;
591
592         /*
593          * the transmit direction is not negotiated beyond respecting the max
594          * number of queues because the host can handle arbitrary guest tx
595          * queues (host rx queues).
596          */
597         avp->num_tx_queues = eth_dev->data->nb_tx_queues;
598
599         /*
600          * the receive direction is more restrictive.  The host requires a
601          * minimum number of guest rx queues (host tx queues) therefore
602          * negotiate a value that is at least as large as the host minimum
603          * requirement.  If the host and guest values are not identical then a
604          * mapping will be established in the receive_queue_setup function.
605          */
606         avp->num_rx_queues = RTE_MAX(host_info->min_rx_queues,
607                                      eth_dev->data->nb_rx_queues);
608
609         PMD_DRV_LOG(DEBUG, "Requesting %u Tx and %u Rx queues from host\n",
610                     avp->num_tx_queues, avp->num_rx_queues);
611 }
612
613 static int
614 avp_dev_attach(struct rte_eth_dev *eth_dev)
615 {
616         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
617         struct rte_avp_device_config config;
618         unsigned int i;
619         int ret;
620
621         PMD_DRV_LOG(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "\n",
622                     eth_dev->data->port_id, avp->device_id);
623
624         rte_spinlock_lock(&avp->lock);
625
626         if (!(avp->flags & AVP_F_DETACHED)) {
627                 PMD_DRV_LOG(NOTICE, "port %u already attached\n",
628                             eth_dev->data->port_id);
629                 ret = 0;
630                 goto unlock;
631         }
632
633         /*
634          * make sure that the detached flag is set prior to reconfiguring the
635          * queues.
636          */
637         avp->flags |= AVP_F_DETACHED;
638         rte_wmb();
639
640         /*
641          * re-run the device create utility which will parse the new host info
642          * and setup the AVP device queue pointers.
643          */
644         ret = avp_dev_create(AVP_DEV_TO_PCI(eth_dev), eth_dev);
645         if (ret < 0) {
646                 PMD_DRV_LOG(ERR, "Failed to re-create AVP device, ret=%d\n",
647                             ret);
648                 goto unlock;
649         }
650
651         if (avp->flags & AVP_F_CONFIGURED) {
652                 /*
653                  * Update the receive queue mapping to handle cases where the
654                  * source and destination hosts have different queue
655                  * requirements.  As long as the DETACHED flag is asserted the
656                  * queue table should not be referenced so it should be safe to
657                  * update it.
658                  */
659                 _avp_set_queue_counts(eth_dev);
660                 for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
661                         _avp_set_rx_queue_mappings(eth_dev, i);
662
663                 /*
664                  * Update the host with our config details so that it knows the
665                  * device is active.
666                  */
667                 memset(&config, 0, sizeof(config));
668                 config.device_id = avp->device_id;
669                 config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
670                 config.driver_version = AVP_DPDK_DRIVER_VERSION;
671                 config.features = avp->features;
672                 config.num_tx_queues = avp->num_tx_queues;
673                 config.num_rx_queues = avp->num_rx_queues;
674                 config.if_up = !!(avp->flags & AVP_F_LINKUP);
675
676                 ret = avp_dev_ctrl_set_config(eth_dev, &config);
677                 if (ret < 0) {
678                         PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
679                                     ret);
680                         goto unlock;
681                 }
682         }
683
684         rte_wmb();
685         avp->flags &= ~AVP_F_DETACHED;
686
687         ret = 0;
688
689 unlock:
690         rte_spinlock_unlock(&avp->lock);
691         return ret;
692 }
693
694 static void
695 avp_dev_interrupt_handler(void *data)
696 {
697         struct rte_eth_dev *eth_dev = data;
698         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
699         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
700         uint32_t status, value;
701         int ret;
702
703         if (registers == NULL)
704                 rte_panic("no mapped MMIO register space\n");
705
706         /* read the interrupt status register
707          * note: this register clears on read so all raised interrupts must be
708          *    handled or remembered for later processing
709          */
710         status = AVP_READ32(
711                 RTE_PTR_ADD(registers,
712                             RTE_AVP_INTERRUPT_STATUS_OFFSET));
713
714         if (status | RTE_AVP_MIGRATION_INTERRUPT_MASK) {
715                 /* handle interrupt based on current status */
716                 value = AVP_READ32(
717                         RTE_PTR_ADD(registers,
718                                     RTE_AVP_MIGRATION_STATUS_OFFSET));
719                 switch (value) {
720                 case RTE_AVP_MIGRATION_DETACHED:
721                         ret = avp_dev_detach(eth_dev);
722                         break;
723                 case RTE_AVP_MIGRATION_ATTACHED:
724                         ret = avp_dev_attach(eth_dev);
725                         break;
726                 default:
727                         PMD_DRV_LOG(ERR, "unexpected migration status, status=%u\n",
728                                     value);
729                         ret = -EINVAL;
730                 }
731
732                 /* acknowledge the request by writing out our current status */
733                 value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR);
734                 AVP_WRITE32(value,
735                             RTE_PTR_ADD(registers,
736                                         RTE_AVP_MIGRATION_ACK_OFFSET));
737
738                 PMD_DRV_LOG(NOTICE, "AVP migration interrupt handled\n");
739         }
740
741         if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK)
742                 PMD_DRV_LOG(WARNING, "AVP unexpected interrupt, status=0x%08x\n",
743                             status);
744
745         /* re-enable UIO interrupt handling */
746         ret = rte_intr_enable(&pci_dev->intr_handle);
747         if (ret < 0) {
748                 PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n",
749                             ret);
750                 /* continue */
751         }
752 }
753
754 static int
755 avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev)
756 {
757         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
758         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
759         int ret;
760
761         if (registers == NULL)
762                 return -EINVAL;
763
764         /* enable UIO interrupt handling */
765         ret = rte_intr_enable(&pci_dev->intr_handle);
766         if (ret < 0) {
767                 PMD_DRV_LOG(ERR, "Failed to enable UIO interrupts, ret=%d\n",
768                             ret);
769                 return ret;
770         }
771
772         /* inform the device that all interrupts are enabled */
773         AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK,
774                     RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
775
776         return 0;
777 }
778
779 static int
780 avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev)
781 {
782         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
783         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
784         int ret;
785
786         if (registers == NULL)
787                 return 0;
788
789         /* inform the device that all interrupts are disabled */
790         AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK,
791                     RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
792
793         /* enable UIO interrupt handling */
794         ret = rte_intr_disable(&pci_dev->intr_handle);
795         if (ret < 0) {
796                 PMD_DRV_LOG(ERR, "Failed to disable UIO interrupts, ret=%d\n",
797                             ret);
798                 return ret;
799         }
800
801         return 0;
802 }
803
804 static int
805 avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev)
806 {
807         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
808         int ret;
809
810         /* register a callback handler with UIO for interrupt notifications */
811         ret = rte_intr_callback_register(&pci_dev->intr_handle,
812                                          avp_dev_interrupt_handler,
813                                          (void *)eth_dev);
814         if (ret < 0) {
815                 PMD_DRV_LOG(ERR, "Failed to register UIO interrupt callback, ret=%d\n",
816                             ret);
817                 return ret;
818         }
819
820         /* enable interrupt processing */
821         return avp_dev_enable_interrupts(eth_dev);
822 }
823
824 static int
825 avp_dev_migration_pending(struct rte_eth_dev *eth_dev)
826 {
827         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
828         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
829         uint32_t value;
830
831         if (registers == NULL)
832                 return 0;
833
834         value = AVP_READ32(RTE_PTR_ADD(registers,
835                                        RTE_AVP_MIGRATION_STATUS_OFFSET));
836         if (value == RTE_AVP_MIGRATION_DETACHED) {
837                 /* migration is in progress; ack it if we have not already */
838                 AVP_WRITE32(value,
839                             RTE_PTR_ADD(registers,
840                                         RTE_AVP_MIGRATION_ACK_OFFSET));
841                 return 1;
842         }
843         return 0;
844 }
845
846 /*
847  * create a AVP device using the supplied device info by first translating it
848  * to guest address space(s).
849  */
850 static int
851 avp_dev_create(struct rte_pci_device *pci_dev,
852                struct rte_eth_dev *eth_dev)
853 {
854         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
855         struct rte_avp_device_info *host_info;
856         struct rte_mem_resource *resource;
857         unsigned int i;
858
859         resource = &pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR];
860         if (resource->addr == NULL) {
861                 PMD_DRV_LOG(ERR, "BAR%u is not mapped\n",
862                             RTE_AVP_PCI_DEVICE_BAR);
863                 return -EFAULT;
864         }
865         host_info = (struct rte_avp_device_info *)resource->addr;
866
867         if ((host_info->magic != RTE_AVP_DEVICE_MAGIC) ||
868                 avp_dev_version_check(host_info->version)) {
869                 PMD_DRV_LOG(ERR, "Invalid AVP PCI device, magic 0x%08x version 0x%08x > 0x%08x\n",
870                             host_info->magic, host_info->version,
871                             AVP_DPDK_DRIVER_VERSION);
872                 return -EINVAL;
873         }
874
875         PMD_DRV_LOG(DEBUG, "AVP host device is v%u.%u.%u\n",
876                     RTE_AVP_GET_RELEASE_VERSION(host_info->version),
877                     RTE_AVP_GET_MAJOR_VERSION(host_info->version),
878                     RTE_AVP_GET_MINOR_VERSION(host_info->version));
879
880         PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u TX queue(s)\n",
881                     host_info->min_tx_queues, host_info->max_tx_queues);
882         PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u RX queue(s)\n",
883                     host_info->min_rx_queues, host_info->max_rx_queues);
884         PMD_DRV_LOG(DEBUG, "AVP host supports features 0x%08x\n",
885                     host_info->features);
886
887         if (avp->magic != AVP_ETHDEV_MAGIC) {
888                 /*
889                  * First time initialization (i.e., not during a VM
890                  * migration)
891                  */
892                 memset(avp, 0, sizeof(*avp));
893                 avp->magic = AVP_ETHDEV_MAGIC;
894                 avp->dev_data = eth_dev->data;
895                 avp->port_id = eth_dev->data->port_id;
896                 avp->host_mbuf_size = host_info->mbuf_size;
897                 avp->host_features = host_info->features;
898                 rte_spinlock_init(&avp->lock);
899                 memcpy(&avp->ethaddr.addr_bytes[0],
900                        host_info->ethaddr, ETHER_ADDR_LEN);
901                 /* adjust max values to not exceed our max */
902                 avp->max_tx_queues =
903                         RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES);
904                 avp->max_rx_queues =
905                         RTE_MIN(host_info->max_rx_queues, RTE_AVP_MAX_QUEUES);
906         } else {
907                 /* Re-attaching during migration */
908
909                 /* TODO... requires validation of host values */
910                 if ((host_info->features & avp->features) != avp->features) {
911                         PMD_DRV_LOG(ERR, "AVP host features mismatched; 0x%08x, host=0x%08x\n",
912                                     avp->features, host_info->features);
913                         /* this should not be possible; continue for now */
914                 }
915         }
916
917         /* the device id is allowed to change over migrations */
918         avp->device_id = host_info->device_id;
919
920         /* translate incoming host addresses to guest address space */
921         PMD_DRV_LOG(DEBUG, "AVP first host tx queue at 0x%" PRIx64 "\n",
922                     host_info->tx_phys);
923         PMD_DRV_LOG(DEBUG, "AVP first host alloc queue at 0x%" PRIx64 "\n",
924                     host_info->alloc_phys);
925         for (i = 0; i < avp->max_tx_queues; i++) {
926                 avp->tx_q[i] = avp_dev_translate_address(eth_dev,
927                         host_info->tx_phys + (i * host_info->tx_size));
928
929                 avp->alloc_q[i] = avp_dev_translate_address(eth_dev,
930                         host_info->alloc_phys + (i * host_info->alloc_size));
931         }
932
933         PMD_DRV_LOG(DEBUG, "AVP first host rx queue at 0x%" PRIx64 "\n",
934                     host_info->rx_phys);
935         PMD_DRV_LOG(DEBUG, "AVP first host free queue at 0x%" PRIx64 "\n",
936                     host_info->free_phys);
937         for (i = 0; i < avp->max_rx_queues; i++) {
938                 avp->rx_q[i] = avp_dev_translate_address(eth_dev,
939                         host_info->rx_phys + (i * host_info->rx_size));
940                 avp->free_q[i] = avp_dev_translate_address(eth_dev,
941                         host_info->free_phys + (i * host_info->free_size));
942         }
943
944         PMD_DRV_LOG(DEBUG, "AVP host request queue at 0x%" PRIx64 "\n",
945                     host_info->req_phys);
946         PMD_DRV_LOG(DEBUG, "AVP host response queue at 0x%" PRIx64 "\n",
947                     host_info->resp_phys);
948         PMD_DRV_LOG(DEBUG, "AVP host sync address at 0x%" PRIx64 "\n",
949                     host_info->sync_phys);
950         PMD_DRV_LOG(DEBUG, "AVP host mbuf address at 0x%" PRIx64 "\n",
951                     host_info->mbuf_phys);
952         avp->req_q = avp_dev_translate_address(eth_dev, host_info->req_phys);
953         avp->resp_q = avp_dev_translate_address(eth_dev, host_info->resp_phys);
954         avp->sync_addr =
955                 avp_dev_translate_address(eth_dev, host_info->sync_phys);
956         avp->mbuf_addr =
957                 avp_dev_translate_address(eth_dev, host_info->mbuf_phys);
958
959         /*
960          * store the host mbuf virtual address so that we can calculate
961          * relative offsets for each mbuf as they are processed
962          */
963         avp->host_mbuf_addr = host_info->mbuf_va;
964         avp->host_sync_addr = host_info->sync_va;
965
966         /*
967          * store the maximum packet length that is supported by the host.
968          */
969         avp->max_rx_pkt_len = host_info->max_rx_pkt_len;
970         PMD_DRV_LOG(DEBUG, "AVP host max receive packet length is %u\n",
971                                 host_info->max_rx_pkt_len);
972
973         return 0;
974 }
975
976 /*
977  * This function is based on probe() function in avp_pci.c
978  * It returns 0 on success.
979  */
980 static int
981 eth_avp_dev_init(struct rte_eth_dev *eth_dev)
982 {
983         struct avp_dev *avp =
984                 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
985         struct rte_pci_device *pci_dev;
986         int ret;
987
988         pci_dev = AVP_DEV_TO_PCI(eth_dev);
989         eth_dev->dev_ops = &avp_eth_dev_ops;
990         eth_dev->rx_pkt_burst = &avp_recv_pkts;
991         eth_dev->tx_pkt_burst = &avp_xmit_pkts;
992
993         if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
994                 /*
995                  * no setup required on secondary processes.  All data is saved
996                  * in dev_private by the primary process. All resource should
997                  * be mapped to the same virtual address so all pointers should
998                  * be valid.
999                  */
1000                 if (eth_dev->data->scattered_rx) {
1001                         PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
1002                         eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1003                         eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1004                 }
1005                 return 0;
1006         }
1007
1008         rte_eth_copy_pci_info(eth_dev, pci_dev);
1009
1010         eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
1011
1012         /* Check current migration status */
1013         if (avp_dev_migration_pending(eth_dev)) {
1014                 PMD_DRV_LOG(ERR, "VM live migration operation in progress\n");
1015                 return -EBUSY;
1016         }
1017
1018         /* Check BAR resources */
1019         ret = avp_dev_check_regions(eth_dev);
1020         if (ret < 0) {
1021                 PMD_DRV_LOG(ERR, "Failed to validate BAR resources, ret=%d\n",
1022                             ret);
1023                 return ret;
1024         }
1025
1026         /* Enable interrupts */
1027         ret = avp_dev_setup_interrupts(eth_dev);
1028         if (ret < 0) {
1029                 PMD_DRV_LOG(ERR, "Failed to enable interrupts, ret=%d\n", ret);
1030                 return ret;
1031         }
1032
1033         /* Handle each subtype */
1034         ret = avp_dev_create(pci_dev, eth_dev);
1035         if (ret < 0) {
1036                 PMD_DRV_LOG(ERR, "Failed to create device, ret=%d\n", ret);
1037                 return ret;
1038         }
1039
1040         /* Allocate memory for storing MAC addresses */
1041         eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev", ETHER_ADDR_LEN, 0);
1042         if (eth_dev->data->mac_addrs == NULL) {
1043                 PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to store MAC addresses\n",
1044                             ETHER_ADDR_LEN);
1045                 return -ENOMEM;
1046         }
1047
1048         /* Get a mac from device config */
1049         ether_addr_copy(&avp->ethaddr, &eth_dev->data->mac_addrs[0]);
1050
1051         return 0;
1052 }
1053
1054 static int
1055 eth_avp_dev_uninit(struct rte_eth_dev *eth_dev)
1056 {
1057         int ret;
1058
1059         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1060                 return -EPERM;
1061
1062         if (eth_dev->data == NULL)
1063                 return 0;
1064
1065         ret = avp_dev_disable_interrupts(eth_dev);
1066         if (ret != 0) {
1067                 PMD_DRV_LOG(ERR, "Failed to disable interrupts, ret=%d\n", ret);
1068                 return ret;
1069         }
1070
1071         if (eth_dev->data->mac_addrs != NULL) {
1072                 rte_free(eth_dev->data->mac_addrs);
1073                 eth_dev->data->mac_addrs = NULL;
1074         }
1075
1076         return 0;
1077 }
1078
1079
1080 static struct eth_driver rte_avp_pmd = {
1081         {
1082                 .id_table = pci_id_avp_map,
1083                 .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
1084                 .probe = rte_eth_dev_pci_probe,
1085                 .remove = rte_eth_dev_pci_remove,
1086         },
1087         .eth_dev_init = eth_avp_dev_init,
1088         .eth_dev_uninit = eth_avp_dev_uninit,
1089         .dev_private_size = sizeof(struct avp_adapter),
1090 };
1091
1092 static int
1093 avp_dev_enable_scattered(struct rte_eth_dev *eth_dev,
1094                          struct avp_dev *avp)
1095 {
1096         unsigned int max_rx_pkt_len;
1097
1098         max_rx_pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
1099
1100         if ((max_rx_pkt_len > avp->guest_mbuf_size) ||
1101             (max_rx_pkt_len > avp->host_mbuf_size)) {
1102                 /*
1103                  * If the guest MTU is greater than either the host or guest
1104                  * buffers then chained mbufs have to be enabled in the TX
1105                  * direction.  It is assumed that the application will not need
1106                  * to send packets larger than their max_rx_pkt_len (MRU).
1107                  */
1108                 return 1;
1109         }
1110
1111         if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) ||
1112             (avp->max_rx_pkt_len > avp->host_mbuf_size)) {
1113                 /*
1114                  * If the host MRU is greater than its own mbuf size or the
1115                  * guest mbuf size then chained mbufs have to be enabled in the
1116                  * RX direction.
1117                  */
1118                 return 1;
1119         }
1120
1121         return 0;
1122 }
1123
1124 static int
1125 avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
1126                        uint16_t rx_queue_id,
1127                        uint16_t nb_rx_desc,
1128                        unsigned int socket_id,
1129                        const struct rte_eth_rxconf *rx_conf,
1130                        struct rte_mempool *pool)
1131 {
1132         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1133         struct rte_pktmbuf_pool_private *mbp_priv;
1134         struct avp_queue *rxq;
1135
1136         if (rx_queue_id >= eth_dev->data->nb_rx_queues) {
1137                 PMD_DRV_LOG(ERR, "RX queue id is out of range: rx_queue_id=%u, nb_rx_queues=%u\n",
1138                             rx_queue_id, eth_dev->data->nb_rx_queues);
1139                 return -EINVAL;
1140         }
1141
1142         /* Save mbuf pool pointer */
1143         avp->pool = pool;
1144
1145         /* Save the local mbuf size */
1146         mbp_priv = rte_mempool_get_priv(pool);
1147         avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size);
1148         avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM;
1149
1150         if (avp_dev_enable_scattered(eth_dev, avp)) {
1151                 if (!eth_dev->data->scattered_rx) {
1152                         PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
1153                         eth_dev->data->scattered_rx = 1;
1154                         eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1155                         eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1156                 }
1157         }
1158
1159         PMD_DRV_LOG(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)\n",
1160                     avp->max_rx_pkt_len,
1161                     eth_dev->data->dev_conf.rxmode.max_rx_pkt_len,
1162                     avp->host_mbuf_size,
1163                     avp->guest_mbuf_size);
1164
1165         /* allocate a queue object */
1166         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct avp_queue),
1167                                  RTE_CACHE_LINE_SIZE, socket_id);
1168         if (rxq == NULL) {
1169                 PMD_DRV_LOG(ERR, "Failed to allocate new Rx queue object\n");
1170                 return -ENOMEM;
1171         }
1172
1173         /* save back pointers to AVP and Ethernet devices */
1174         rxq->avp = avp;
1175         rxq->dev_data = eth_dev->data;
1176         eth_dev->data->rx_queues[rx_queue_id] = (void *)rxq;
1177
1178         /* setup the queue receive mapping for the current queue. */
1179         _avp_set_rx_queue_mappings(eth_dev, rx_queue_id);
1180
1181         PMD_DRV_LOG(DEBUG, "Rx queue %u setup at %p\n", rx_queue_id, rxq);
1182
1183         (void)nb_rx_desc;
1184         (void)rx_conf;
1185         return 0;
1186 }
1187
1188 static int
1189 avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev,
1190                        uint16_t tx_queue_id,
1191                        uint16_t nb_tx_desc,
1192                        unsigned int socket_id,
1193                        const struct rte_eth_txconf *tx_conf)
1194 {
1195         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1196         struct avp_queue *txq;
1197
1198         if (tx_queue_id >= eth_dev->data->nb_tx_queues) {
1199                 PMD_DRV_LOG(ERR, "TX queue id is out of range: tx_queue_id=%u, nb_tx_queues=%u\n",
1200                             tx_queue_id, eth_dev->data->nb_tx_queues);
1201                 return -EINVAL;
1202         }
1203
1204         /* allocate a queue object */
1205         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct avp_queue),
1206                                  RTE_CACHE_LINE_SIZE, socket_id);
1207         if (txq == NULL) {
1208                 PMD_DRV_LOG(ERR, "Failed to allocate new Tx queue object\n");
1209                 return -ENOMEM;
1210         }
1211
1212         /* only the configured set of transmit queues are used */
1213         txq->queue_id = tx_queue_id;
1214         txq->queue_base = tx_queue_id;
1215         txq->queue_limit = tx_queue_id;
1216
1217         /* save back pointers to AVP and Ethernet devices */
1218         txq->avp = avp;
1219         txq->dev_data = eth_dev->data;
1220         eth_dev->data->tx_queues[tx_queue_id] = (void *)txq;
1221
1222         PMD_DRV_LOG(DEBUG, "Tx queue %u setup at %p\n", tx_queue_id, txq);
1223
1224         (void)nb_tx_desc;
1225         (void)tx_conf;
1226         return 0;
1227 }
1228
1229 static inline int
1230 _avp_cmp_ether_addr(struct ether_addr *a, struct ether_addr *b)
1231 {
1232         uint16_t *_a = (uint16_t *)&a->addr_bytes[0];
1233         uint16_t *_b = (uint16_t *)&b->addr_bytes[0];
1234         return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]);
1235 }
1236
1237 static inline int
1238 _avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m)
1239 {
1240         struct ether_hdr *eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
1241
1242         if (likely(_avp_cmp_ether_addr(&avp->ethaddr, &eth->d_addr) == 0)) {
1243                 /* allow all packets destined to our address */
1244                 return 0;
1245         }
1246
1247         if (likely(is_broadcast_ether_addr(&eth->d_addr))) {
1248                 /* allow all broadcast packets */
1249                 return 0;
1250         }
1251
1252         if (likely(is_multicast_ether_addr(&eth->d_addr))) {
1253                 /* allow all multicast packets */
1254                 return 0;
1255         }
1256
1257         if (avp->flags & AVP_F_PROMISC) {
1258                 /* allow all packets when in promiscuous mode */
1259                 return 0;
1260         }
1261
1262         return -1;
1263 }
1264
1265 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1266 static inline void
1267 __avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf)
1268 {
1269         struct rte_avp_desc *first_buf;
1270         struct rte_avp_desc *pkt_buf;
1271         unsigned int pkt_len;
1272         unsigned int nb_segs;
1273         void *pkt_data;
1274         unsigned int i;
1275
1276         first_buf = avp_dev_translate_buffer(avp, buf);
1277
1278         i = 0;
1279         pkt_len = 0;
1280         nb_segs = first_buf->nb_segs;
1281         do {
1282                 /* Adjust pointers for guest addressing */
1283                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1284                 if (pkt_buf == NULL)
1285                         rte_panic("bad buffer: segment %u has an invalid address %p\n",
1286                                   i, buf);
1287                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1288                 if (pkt_data == NULL)
1289                         rte_panic("bad buffer: segment %u has a NULL data pointer\n",
1290                                   i);
1291                 if (pkt_buf->data_len == 0)
1292                         rte_panic("bad buffer: segment %u has 0 data length\n",
1293                                   i);
1294                 pkt_len += pkt_buf->data_len;
1295                 nb_segs--;
1296                 i++;
1297
1298         } while (nb_segs && (buf = pkt_buf->next) != NULL);
1299
1300         if (nb_segs != 0)
1301                 rte_panic("bad buffer: expected %u segments found %u\n",
1302                           first_buf->nb_segs, (first_buf->nb_segs - nb_segs));
1303         if (pkt_len != first_buf->pkt_len)
1304                 rte_panic("bad buffer: expected length %u found %u\n",
1305                           first_buf->pkt_len, pkt_len);
1306 }
1307
1308 #define avp_dev_buffer_sanity_check(a, b) \
1309         __avp_dev_buffer_sanity_check((a), (b))
1310
1311 #else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */
1312
1313 #define avp_dev_buffer_sanity_check(a, b) do {} while (0)
1314
1315 #endif
1316
1317 /*
1318  * Copy a host buffer chain to a set of mbufs.  This function assumes that
1319  * there exactly the required number of mbufs to copy all source bytes.
1320  */
1321 static inline struct rte_mbuf *
1322 avp_dev_copy_from_buffers(struct avp_dev *avp,
1323                           struct rte_avp_desc *buf,
1324                           struct rte_mbuf **mbufs,
1325                           unsigned int count)
1326 {
1327         struct rte_mbuf *m_previous = NULL;
1328         struct rte_avp_desc *pkt_buf;
1329         unsigned int total_length = 0;
1330         unsigned int copy_length;
1331         unsigned int src_offset;
1332         struct rte_mbuf *m;
1333         uint16_t ol_flags;
1334         uint16_t vlan_tci;
1335         void *pkt_data;
1336         unsigned int i;
1337
1338         avp_dev_buffer_sanity_check(avp, buf);
1339
1340         /* setup the first source buffer */
1341         pkt_buf = avp_dev_translate_buffer(avp, buf);
1342         pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1343         total_length = pkt_buf->pkt_len;
1344         src_offset = 0;
1345
1346         if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1347                 ol_flags = PKT_RX_VLAN_PKT;
1348                 vlan_tci = pkt_buf->vlan_tci;
1349         } else {
1350                 ol_flags = 0;
1351                 vlan_tci = 0;
1352         }
1353
1354         for (i = 0; (i < count) && (buf != NULL); i++) {
1355                 /* fill each destination buffer */
1356                 m = mbufs[i];
1357
1358                 if (m_previous != NULL)
1359                         m_previous->next = m;
1360
1361                 m_previous = m;
1362
1363                 do {
1364                         /*
1365                          * Copy as many source buffers as will fit in the
1366                          * destination buffer.
1367                          */
1368                         copy_length = RTE_MIN((avp->guest_mbuf_size -
1369                                                rte_pktmbuf_data_len(m)),
1370                                               (pkt_buf->data_len -
1371                                                src_offset));
1372                         rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1373                                                rte_pktmbuf_data_len(m)),
1374                                    RTE_PTR_ADD(pkt_data, src_offset),
1375                                    copy_length);
1376                         rte_pktmbuf_data_len(m) += copy_length;
1377                         src_offset += copy_length;
1378
1379                         if (likely(src_offset == pkt_buf->data_len)) {
1380                                 /* need a new source buffer */
1381                                 buf = pkt_buf->next;
1382                                 if (buf != NULL) {
1383                                         pkt_buf = avp_dev_translate_buffer(
1384                                                 avp, buf);
1385                                         pkt_data = avp_dev_translate_buffer(
1386                                                 avp, pkt_buf->data);
1387                                         src_offset = 0;
1388                                 }
1389                         }
1390
1391                         if (unlikely(rte_pktmbuf_data_len(m) ==
1392                                      avp->guest_mbuf_size)) {
1393                                 /* need a new destination mbuf */
1394                                 break;
1395                         }
1396
1397                 } while (buf != NULL);
1398         }
1399
1400         m = mbufs[0];
1401         m->ol_flags = ol_flags;
1402         m->nb_segs = count;
1403         rte_pktmbuf_pkt_len(m) = total_length;
1404         m->vlan_tci = vlan_tci;
1405
1406         __rte_mbuf_sanity_check(m, 1);
1407
1408         return m;
1409 }
1410
1411 static uint16_t
1412 avp_recv_scattered_pkts(void *rx_queue,
1413                         struct rte_mbuf **rx_pkts,
1414                         uint16_t nb_pkts)
1415 {
1416         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1417         struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1418         struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS];
1419         struct avp_dev *avp = rxq->avp;
1420         struct rte_avp_desc *pkt_buf;
1421         struct rte_avp_fifo *free_q;
1422         struct rte_avp_fifo *rx_q;
1423         struct rte_avp_desc *buf;
1424         unsigned int count, avail, n;
1425         unsigned int guest_mbuf_size;
1426         struct rte_mbuf *m;
1427         unsigned int required;
1428         unsigned int buf_len;
1429         unsigned int port_id;
1430         unsigned int i;
1431
1432         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1433                 /* VM live migration in progress */
1434                 return 0;
1435         }
1436
1437         guest_mbuf_size = avp->guest_mbuf_size;
1438         port_id = avp->port_id;
1439         rx_q = avp->rx_q[rxq->queue_id];
1440         free_q = avp->free_q[rxq->queue_id];
1441
1442         /* setup next queue to service */
1443         rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1444                 (rxq->queue_id + 1) : rxq->queue_base;
1445
1446         /* determine how many slots are available in the free queue */
1447         count = avp_fifo_free_count(free_q);
1448
1449         /* determine how many packets are available in the rx queue */
1450         avail = avp_fifo_count(rx_q);
1451
1452         /* determine how many packets can be received */
1453         count = RTE_MIN(count, avail);
1454         count = RTE_MIN(count, nb_pkts);
1455         count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1456
1457         if (unlikely(count == 0)) {
1458                 /* no free buffers, or no buffers on the rx queue */
1459                 return 0;
1460         }
1461
1462         /* retrieve pending packets */
1463         n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1464         PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1465                    count, rx_q);
1466
1467         count = 0;
1468         for (i = 0; i < n; i++) {
1469                 /* prefetch next entry while processing current one */
1470                 if (i + 1 < n) {
1471                         pkt_buf = avp_dev_translate_buffer(avp,
1472                                                            avp_bufs[i + 1]);
1473                         rte_prefetch0(pkt_buf);
1474                 }
1475                 buf = avp_bufs[i];
1476
1477                 /* Peek into the first buffer to determine the total length */
1478                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1479                 buf_len = pkt_buf->pkt_len;
1480
1481                 /* Allocate enough mbufs to receive the entire packet */
1482                 required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size;
1483                 if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) {
1484                         rxq->dev_data->rx_mbuf_alloc_failed++;
1485                         continue;
1486                 }
1487
1488                 /* Copy the data from the buffers to our mbufs */
1489                 m = avp_dev_copy_from_buffers(avp, buf, mbufs, required);
1490
1491                 /* finalize mbuf */
1492                 m->port = port_id;
1493
1494                 if (_avp_mac_filter(avp, m) != 0) {
1495                         /* silently discard packets not destined to our MAC */
1496                         rte_pktmbuf_free(m);
1497                         continue;
1498                 }
1499
1500                 /* return new mbuf to caller */
1501                 rx_pkts[count++] = m;
1502                 rxq->bytes += buf_len;
1503         }
1504
1505         rxq->packets += count;
1506
1507         /* return the buffers to the free queue */
1508         avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1509
1510         return count;
1511 }
1512
1513
1514 static uint16_t
1515 avp_recv_pkts(void *rx_queue,
1516               struct rte_mbuf **rx_pkts,
1517               uint16_t nb_pkts)
1518 {
1519         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1520         struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1521         struct avp_dev *avp = rxq->avp;
1522         struct rte_avp_desc *pkt_buf;
1523         struct rte_avp_fifo *free_q;
1524         struct rte_avp_fifo *rx_q;
1525         unsigned int count, avail, n;
1526         unsigned int pkt_len;
1527         struct rte_mbuf *m;
1528         char *pkt_data;
1529         unsigned int i;
1530
1531         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1532                 /* VM live migration in progress */
1533                 return 0;
1534         }
1535
1536         rx_q = avp->rx_q[rxq->queue_id];
1537         free_q = avp->free_q[rxq->queue_id];
1538
1539         /* setup next queue to service */
1540         rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1541                 (rxq->queue_id + 1) : rxq->queue_base;
1542
1543         /* determine how many slots are available in the free queue */
1544         count = avp_fifo_free_count(free_q);
1545
1546         /* determine how many packets are available in the rx queue */
1547         avail = avp_fifo_count(rx_q);
1548
1549         /* determine how many packets can be received */
1550         count = RTE_MIN(count, avail);
1551         count = RTE_MIN(count, nb_pkts);
1552         count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1553
1554         if (unlikely(count == 0)) {
1555                 /* no free buffers, or no buffers on the rx queue */
1556                 return 0;
1557         }
1558
1559         /* retrieve pending packets */
1560         n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1561         PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1562                    count, rx_q);
1563
1564         count = 0;
1565         for (i = 0; i < n; i++) {
1566                 /* prefetch next entry while processing current one */
1567                 if (i < n - 1) {
1568                         pkt_buf = avp_dev_translate_buffer(avp,
1569                                                            avp_bufs[i + 1]);
1570                         rte_prefetch0(pkt_buf);
1571                 }
1572
1573                 /* Adjust host pointers for guest addressing */
1574                 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1575                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1576                 pkt_len = pkt_buf->pkt_len;
1577
1578                 if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1579                              (pkt_buf->nb_segs > 1))) {
1580                         /*
1581                          * application should be using the scattered receive
1582                          * function
1583                          */
1584                         rxq->errors++;
1585                         continue;
1586                 }
1587
1588                 /* process each packet to be transmitted */
1589                 m = rte_pktmbuf_alloc(avp->pool);
1590                 if (unlikely(m == NULL)) {
1591                         rxq->dev_data->rx_mbuf_alloc_failed++;
1592                         continue;
1593                 }
1594
1595                 /* copy data out of the host buffer to our buffer */
1596                 m->data_off = RTE_PKTMBUF_HEADROOM;
1597                 rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len);
1598
1599                 /* initialize the local mbuf */
1600                 rte_pktmbuf_data_len(m) = pkt_len;
1601                 rte_pktmbuf_pkt_len(m) = pkt_len;
1602                 m->port = avp->port_id;
1603
1604                 if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1605                         m->ol_flags = PKT_RX_VLAN_PKT;
1606                         m->vlan_tci = pkt_buf->vlan_tci;
1607                 }
1608
1609                 if (_avp_mac_filter(avp, m) != 0) {
1610                         /* silently discard packets not destined to our MAC */
1611                         rte_pktmbuf_free(m);
1612                         continue;
1613                 }
1614
1615                 /* return new mbuf to caller */
1616                 rx_pkts[count++] = m;
1617                 rxq->bytes += pkt_len;
1618         }
1619
1620         rxq->packets += count;
1621
1622         /* return the buffers to the free queue */
1623         avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1624
1625         return count;
1626 }
1627
1628 /*
1629  * Copy a chained mbuf to a set of host buffers.  This function assumes that
1630  * there are sufficient destination buffers to contain the entire source
1631  * packet.
1632  */
1633 static inline uint16_t
1634 avp_dev_copy_to_buffers(struct avp_dev *avp,
1635                         struct rte_mbuf *mbuf,
1636                         struct rte_avp_desc **buffers,
1637                         unsigned int count)
1638 {
1639         struct rte_avp_desc *previous_buf = NULL;
1640         struct rte_avp_desc *first_buf = NULL;
1641         struct rte_avp_desc *pkt_buf;
1642         struct rte_avp_desc *buf;
1643         size_t total_length;
1644         struct rte_mbuf *m;
1645         size_t copy_length;
1646         size_t src_offset;
1647         char *pkt_data;
1648         unsigned int i;
1649
1650         __rte_mbuf_sanity_check(mbuf, 1);
1651
1652         m = mbuf;
1653         src_offset = 0;
1654         total_length = rte_pktmbuf_pkt_len(m);
1655         for (i = 0; (i < count) && (m != NULL); i++) {
1656                 /* fill each destination buffer */
1657                 buf = buffers[i];
1658
1659                 if (i < count - 1) {
1660                         /* prefetch next entry while processing this one */
1661                         pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]);
1662                         rte_prefetch0(pkt_buf);
1663                 }
1664
1665                 /* Adjust pointers for guest addressing */
1666                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1667                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1668
1669                 /* setup the buffer chain */
1670                 if (previous_buf != NULL)
1671                         previous_buf->next = buf;
1672                 else
1673                         first_buf = pkt_buf;
1674
1675                 previous_buf = pkt_buf;
1676
1677                 do {
1678                         /*
1679                          * copy as many source mbuf segments as will fit in the
1680                          * destination buffer.
1681                          */
1682                         copy_length = RTE_MIN((avp->host_mbuf_size -
1683                                                pkt_buf->data_len),
1684                                               (rte_pktmbuf_data_len(m) -
1685                                                src_offset));
1686                         rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len),
1687                                    RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1688                                                src_offset),
1689                                    copy_length);
1690                         pkt_buf->data_len += copy_length;
1691                         src_offset += copy_length;
1692
1693                         if (likely(src_offset == rte_pktmbuf_data_len(m))) {
1694                                 /* need a new source buffer */
1695                                 m = m->next;
1696                                 src_offset = 0;
1697                         }
1698
1699                         if (unlikely(pkt_buf->data_len ==
1700                                      avp->host_mbuf_size)) {
1701                                 /* need a new destination buffer */
1702                                 break;
1703                         }
1704
1705                 } while (m != NULL);
1706         }
1707
1708         first_buf->nb_segs = count;
1709         first_buf->pkt_len = total_length;
1710
1711         if (mbuf->ol_flags & PKT_TX_VLAN_PKT) {
1712                 first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1713                 first_buf->vlan_tci = mbuf->vlan_tci;
1714         }
1715
1716         avp_dev_buffer_sanity_check(avp, buffers[0]);
1717
1718         return total_length;
1719 }
1720
1721
1722 static uint16_t
1723 avp_xmit_scattered_pkts(void *tx_queue,
1724                         struct rte_mbuf **tx_pkts,
1725                         uint16_t nb_pkts)
1726 {
1727         struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST *
1728                                        RTE_AVP_MAX_MBUF_SEGMENTS)];
1729         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1730         struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST];
1731         struct avp_dev *avp = txq->avp;
1732         struct rte_avp_fifo *alloc_q;
1733         struct rte_avp_fifo *tx_q;
1734         unsigned int count, avail, n;
1735         unsigned int orig_nb_pkts;
1736         struct rte_mbuf *m;
1737         unsigned int required;
1738         unsigned int segments;
1739         unsigned int tx_bytes;
1740         unsigned int i;
1741
1742         orig_nb_pkts = nb_pkts;
1743         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1744                 /* VM live migration in progress */
1745                 /* TODO ... buffer for X packets then drop? */
1746                 txq->errors += nb_pkts;
1747                 return 0;
1748         }
1749
1750         tx_q = avp->tx_q[txq->queue_id];
1751         alloc_q = avp->alloc_q[txq->queue_id];
1752
1753         /* limit the number of transmitted packets to the max burst size */
1754         if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1755                 nb_pkts = AVP_MAX_TX_BURST;
1756
1757         /* determine how many buffers are available to copy into */
1758         avail = avp_fifo_count(alloc_q);
1759         if (unlikely(avail > (AVP_MAX_TX_BURST *
1760                               RTE_AVP_MAX_MBUF_SEGMENTS)))
1761                 avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS;
1762
1763         /* determine how many slots are available in the transmit queue */
1764         count = avp_fifo_free_count(tx_q);
1765
1766         /* determine how many packets can be sent */
1767         nb_pkts = RTE_MIN(count, nb_pkts);
1768
1769         /* determine how many packets will fit in the available buffers */
1770         count = 0;
1771         segments = 0;
1772         for (i = 0; i < nb_pkts; i++) {
1773                 m = tx_pkts[i];
1774                 if (likely(i < (unsigned int)nb_pkts - 1)) {
1775                         /* prefetch next entry while processing this one */
1776                         rte_prefetch0(tx_pkts[i + 1]);
1777                 }
1778                 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1779                         avp->host_mbuf_size;
1780
1781                 if (unlikely((required == 0) ||
1782                              (required > RTE_AVP_MAX_MBUF_SEGMENTS)))
1783                         break;
1784                 else if (unlikely(required + segments > avail))
1785                         break;
1786                 segments += required;
1787                 count++;
1788         }
1789         nb_pkts = count;
1790
1791         if (unlikely(nb_pkts == 0)) {
1792                 /* no available buffers, or no space on the tx queue */
1793                 txq->errors += orig_nb_pkts;
1794                 return 0;
1795         }
1796
1797         PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1798                    nb_pkts, tx_q);
1799
1800         /* retrieve sufficient send buffers */
1801         n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments);
1802         if (unlikely(n != segments)) {
1803                 PMD_TX_LOG(DEBUG, "Failed to allocate buffers "
1804                            "n=%u, segments=%u, orig=%u\n",
1805                            n, segments, orig_nb_pkts);
1806                 txq->errors += orig_nb_pkts;
1807                 return 0;
1808         }
1809
1810         tx_bytes = 0;
1811         count = 0;
1812         for (i = 0; i < nb_pkts; i++) {
1813                 /* process each packet to be transmitted */
1814                 m = tx_pkts[i];
1815
1816                 /* determine how many buffers are required for this packet */
1817                 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1818                         avp->host_mbuf_size;
1819
1820                 tx_bytes += avp_dev_copy_to_buffers(avp, m,
1821                                                     &avp_bufs[count], required);
1822                 tx_bufs[i] = avp_bufs[count];
1823                 count += required;
1824
1825                 /* free the original mbuf */
1826                 rte_pktmbuf_free(m);
1827         }
1828
1829         txq->packets += nb_pkts;
1830         txq->bytes += tx_bytes;
1831
1832 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1833         for (i = 0; i < nb_pkts; i++)
1834                 avp_dev_buffer_sanity_check(avp, tx_bufs[i]);
1835 #endif
1836
1837         /* send the packets */
1838         n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts);
1839         if (unlikely(n != orig_nb_pkts))
1840                 txq->errors += (orig_nb_pkts - n);
1841
1842         return n;
1843 }
1844
1845
1846 static uint16_t
1847 avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1848 {
1849         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1850         struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST];
1851         struct avp_dev *avp = txq->avp;
1852         struct rte_avp_desc *pkt_buf;
1853         struct rte_avp_fifo *alloc_q;
1854         struct rte_avp_fifo *tx_q;
1855         unsigned int count, avail, n;
1856         struct rte_mbuf *m;
1857         unsigned int pkt_len;
1858         unsigned int tx_bytes;
1859         char *pkt_data;
1860         unsigned int i;
1861
1862         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1863                 /* VM live migration in progress */
1864                 /* TODO ... buffer for X packets then drop?! */
1865                 txq->errors++;
1866                 return 0;
1867         }
1868
1869         tx_q = avp->tx_q[txq->queue_id];
1870         alloc_q = avp->alloc_q[txq->queue_id];
1871
1872         /* limit the number of transmitted packets to the max burst size */
1873         if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1874                 nb_pkts = AVP_MAX_TX_BURST;
1875
1876         /* determine how many buffers are available to copy into */
1877         avail = avp_fifo_count(alloc_q);
1878
1879         /* determine how many slots are available in the transmit queue */
1880         count = avp_fifo_free_count(tx_q);
1881
1882         /* determine how many packets can be sent */
1883         count = RTE_MIN(count, avail);
1884         count = RTE_MIN(count, nb_pkts);
1885
1886         if (unlikely(count == 0)) {
1887                 /* no available buffers, or no space on the tx queue */
1888                 txq->errors += nb_pkts;
1889                 return 0;
1890         }
1891
1892         PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1893                    count, tx_q);
1894
1895         /* retrieve sufficient send buffers */
1896         n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count);
1897         if (unlikely(n != count)) {
1898                 txq->errors++;
1899                 return 0;
1900         }
1901
1902         tx_bytes = 0;
1903         for (i = 0; i < count; i++) {
1904                 /* prefetch next entry while processing the current one */
1905                 if (i < count - 1) {
1906                         pkt_buf = avp_dev_translate_buffer(avp,
1907                                                            avp_bufs[i + 1]);
1908                         rte_prefetch0(pkt_buf);
1909                 }
1910
1911                 /* process each packet to be transmitted */
1912                 m = tx_pkts[i];
1913
1914                 /* Adjust pointers for guest addressing */
1915                 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1916                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1917                 pkt_len = rte_pktmbuf_pkt_len(m);
1918
1919                 if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1920                                          (pkt_len > avp->host_mbuf_size))) {
1921                         /*
1922                          * application should be using the scattered transmit
1923                          * function; send it truncated to avoid the performance
1924                          * hit of having to manage returning the already
1925                          * allocated buffer to the free list.  This should not
1926                          * happen since the application should have set the
1927                          * max_rx_pkt_len based on its MTU and it should be
1928                          * policing its own packet sizes.
1929                          */
1930                         txq->errors++;
1931                         pkt_len = RTE_MIN(avp->guest_mbuf_size,
1932                                           avp->host_mbuf_size);
1933                 }
1934
1935                 /* copy data out of our mbuf and into the AVP buffer */
1936                 rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len);
1937                 pkt_buf->pkt_len = pkt_len;
1938                 pkt_buf->data_len = pkt_len;
1939                 pkt_buf->nb_segs = 1;
1940                 pkt_buf->next = NULL;
1941
1942                 if (m->ol_flags & PKT_TX_VLAN_PKT) {
1943                         pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1944                         pkt_buf->vlan_tci = m->vlan_tci;
1945                 }
1946
1947                 tx_bytes += pkt_len;
1948
1949                 /* free the original mbuf */
1950                 rte_pktmbuf_free(m);
1951         }
1952
1953         txq->packets += count;
1954         txq->bytes += tx_bytes;
1955
1956         /* send the packets */
1957         n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count);
1958
1959         return n;
1960 }
1961
1962 static void
1963 avp_dev_rx_queue_release(void *rx_queue)
1964 {
1965         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1966         struct avp_dev *avp = rxq->avp;
1967         struct rte_eth_dev_data *data = avp->dev_data;
1968         unsigned int i;
1969
1970         for (i = 0; i < avp->num_rx_queues; i++) {
1971                 if (data->rx_queues[i] == rxq)
1972                         data->rx_queues[i] = NULL;
1973         }
1974 }
1975
1976 static void
1977 avp_dev_tx_queue_release(void *tx_queue)
1978 {
1979         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1980         struct avp_dev *avp = txq->avp;
1981         struct rte_eth_dev_data *data = avp->dev_data;
1982         unsigned int i;
1983
1984         for (i = 0; i < avp->num_tx_queues; i++) {
1985                 if (data->tx_queues[i] == txq)
1986                         data->tx_queues[i] = NULL;
1987         }
1988 }
1989
1990 static int
1991 avp_dev_configure(struct rte_eth_dev *eth_dev)
1992 {
1993         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
1994         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1995         struct rte_avp_device_info *host_info;
1996         struct rte_avp_device_config config;
1997         int mask = 0;
1998         void *addr;
1999         int ret;
2000
2001         rte_spinlock_lock(&avp->lock);
2002         if (avp->flags & AVP_F_DETACHED) {
2003                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2004                 ret = -ENOTSUP;
2005                 goto unlock;
2006         }
2007
2008         addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
2009         host_info = (struct rte_avp_device_info *)addr;
2010
2011         /* Setup required number of queues */
2012         _avp_set_queue_counts(eth_dev);
2013
2014         mask = (ETH_VLAN_STRIP_MASK |
2015                 ETH_VLAN_FILTER_MASK |
2016                 ETH_VLAN_EXTEND_MASK);
2017         avp_vlan_offload_set(eth_dev, mask);
2018
2019         /* update device config */
2020         memset(&config, 0, sizeof(config));
2021         config.device_id = host_info->device_id;
2022         config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
2023         config.driver_version = AVP_DPDK_DRIVER_VERSION;
2024         config.features = avp->features;
2025         config.num_tx_queues = avp->num_tx_queues;
2026         config.num_rx_queues = avp->num_rx_queues;
2027
2028         ret = avp_dev_ctrl_set_config(eth_dev, &config);
2029         if (ret < 0) {
2030                 PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
2031                             ret);
2032                 goto unlock;
2033         }
2034
2035         avp->flags |= AVP_F_CONFIGURED;
2036         ret = 0;
2037
2038 unlock:
2039         rte_spinlock_unlock(&avp->lock);
2040         return ret;
2041 }
2042
2043 static int
2044 avp_dev_start(struct rte_eth_dev *eth_dev)
2045 {
2046         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2047         int ret;
2048
2049         rte_spinlock_lock(&avp->lock);
2050         if (avp->flags & AVP_F_DETACHED) {
2051                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2052                 ret = -ENOTSUP;
2053                 goto unlock;
2054         }
2055
2056         /* disable features that we do not support */
2057         eth_dev->data->dev_conf.rxmode.hw_ip_checksum = 0;
2058         eth_dev->data->dev_conf.rxmode.hw_vlan_filter = 0;
2059         eth_dev->data->dev_conf.rxmode.hw_vlan_extend = 0;
2060         eth_dev->data->dev_conf.rxmode.hw_strip_crc = 0;
2061
2062         /* update link state */
2063         ret = avp_dev_ctrl_set_link_state(eth_dev, 1);
2064         if (ret < 0) {
2065                 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2066                             ret);
2067                 goto unlock;
2068         }
2069
2070         /* remember current link state */
2071         avp->flags |= AVP_F_LINKUP;
2072
2073         ret = 0;
2074
2075 unlock:
2076         rte_spinlock_unlock(&avp->lock);
2077         return ret;
2078 }
2079
2080 static void
2081 avp_dev_stop(struct rte_eth_dev *eth_dev)
2082 {
2083         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2084         int ret;
2085
2086         rte_spinlock_lock(&avp->lock);
2087         if (avp->flags & AVP_F_DETACHED) {
2088                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2089                 goto unlock;
2090         }
2091
2092         /* remember current link state */
2093         avp->flags &= ~AVP_F_LINKUP;
2094
2095         /* update link state */
2096         ret = avp_dev_ctrl_set_link_state(eth_dev, 0);
2097         if (ret < 0) {
2098                 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2099                             ret);
2100         }
2101
2102 unlock:
2103         rte_spinlock_unlock(&avp->lock);
2104 }
2105
2106 static void
2107 avp_dev_close(struct rte_eth_dev *eth_dev)
2108 {
2109         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2110         int ret;
2111
2112         rte_spinlock_lock(&avp->lock);
2113         if (avp->flags & AVP_F_DETACHED) {
2114                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2115                 goto unlock;
2116         }
2117
2118         /* remember current link state */
2119         avp->flags &= ~AVP_F_LINKUP;
2120         avp->flags &= ~AVP_F_CONFIGURED;
2121
2122         ret = avp_dev_disable_interrupts(eth_dev);
2123         if (ret < 0) {
2124                 PMD_DRV_LOG(ERR, "Failed to disable interrupts\n");
2125                 /* continue */
2126         }
2127
2128         /* update device state */
2129         ret = avp_dev_ctrl_shutdown(eth_dev);
2130         if (ret < 0) {
2131                 PMD_DRV_LOG(ERR, "Device shutdown failed by host, ret=%d\n",
2132                             ret);
2133                 /* continue */
2134         }
2135
2136 unlock:
2137         rte_spinlock_unlock(&avp->lock);
2138 }
2139
2140 static int
2141 avp_dev_link_update(struct rte_eth_dev *eth_dev,
2142                                         __rte_unused int wait_to_complete)
2143 {
2144         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2145         struct rte_eth_link *link = &eth_dev->data->dev_link;
2146
2147         link->link_speed = ETH_SPEED_NUM_10G;
2148         link->link_duplex = ETH_LINK_FULL_DUPLEX;
2149         link->link_status = !!(avp->flags & AVP_F_LINKUP);
2150
2151         return -1;
2152 }
2153
2154 static void
2155 avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2156 {
2157         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2158
2159         rte_spinlock_lock(&avp->lock);
2160         if ((avp->flags & AVP_F_PROMISC) == 0) {
2161                 avp->flags |= AVP_F_PROMISC;
2162                 PMD_DRV_LOG(DEBUG, "Promiscuous mode enabled on %u\n",
2163                             eth_dev->data->port_id);
2164         }
2165         rte_spinlock_unlock(&avp->lock);
2166 }
2167
2168 static void
2169 avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev)
2170 {
2171         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2172
2173         rte_spinlock_lock(&avp->lock);
2174         if ((avp->flags & AVP_F_PROMISC) != 0) {
2175                 avp->flags &= ~AVP_F_PROMISC;
2176                 PMD_DRV_LOG(DEBUG, "Promiscuous mode disabled on %u\n",
2177                             eth_dev->data->port_id);
2178         }
2179         rte_spinlock_unlock(&avp->lock);
2180 }
2181
2182 static void
2183 avp_dev_info_get(struct rte_eth_dev *eth_dev,
2184                  struct rte_eth_dev_info *dev_info)
2185 {
2186         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2187
2188         dev_info->driver_name = "rte_avp_pmd";
2189         dev_info->pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
2190         dev_info->max_rx_queues = avp->max_rx_queues;
2191         dev_info->max_tx_queues = avp->max_tx_queues;
2192         dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE;
2193         dev_info->max_rx_pktlen = avp->max_rx_pkt_len;
2194         dev_info->max_mac_addrs = AVP_MAX_MAC_ADDRS;
2195         if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2196                 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2197                 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
2198         }
2199 }
2200
2201 static void
2202 avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
2203 {
2204         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2205
2206         if (mask & ETH_VLAN_STRIP_MASK) {
2207                 if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2208                         if (eth_dev->data->dev_conf.rxmode.hw_vlan_strip)
2209                                 avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD;
2210                         else
2211                                 avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD;
2212                 } else {
2213                         PMD_DRV_LOG(ERR, "VLAN strip offload not supported\n");
2214                 }
2215         }
2216
2217         if (mask & ETH_VLAN_FILTER_MASK) {
2218                 if (eth_dev->data->dev_conf.rxmode.hw_vlan_filter)
2219                         PMD_DRV_LOG(ERR, "VLAN filter offload not supported\n");
2220         }
2221
2222         if (mask & ETH_VLAN_EXTEND_MASK) {
2223                 if (eth_dev->data->dev_conf.rxmode.hw_vlan_extend)
2224                         PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n");
2225         }
2226 }
2227
2228 static void
2229 avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats)
2230 {
2231         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2232         unsigned int i;
2233
2234         for (i = 0; i < avp->num_rx_queues; i++) {
2235                 struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2236
2237                 if (rxq) {
2238                         stats->ipackets += rxq->packets;
2239                         stats->ibytes += rxq->bytes;
2240                         stats->ierrors += rxq->errors;
2241
2242                         stats->q_ipackets[i] += rxq->packets;
2243                         stats->q_ibytes[i] += rxq->bytes;
2244                         stats->q_errors[i] += rxq->errors;
2245                 }
2246         }
2247
2248         for (i = 0; i < avp->num_tx_queues; i++) {
2249                 struct avp_queue *txq = avp->dev_data->tx_queues[i];
2250
2251                 if (txq) {
2252                         stats->opackets += txq->packets;
2253                         stats->obytes += txq->bytes;
2254                         stats->oerrors += txq->errors;
2255
2256                         stats->q_opackets[i] += txq->packets;
2257                         stats->q_obytes[i] += txq->bytes;
2258                         stats->q_errors[i] += txq->errors;
2259                 }
2260         }
2261 }
2262
2263 static void
2264 avp_dev_stats_reset(struct rte_eth_dev *eth_dev)
2265 {
2266         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2267         unsigned int i;
2268
2269         for (i = 0; i < avp->num_rx_queues; i++) {
2270                 struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2271
2272                 if (rxq) {
2273                         rxq->bytes = 0;
2274                         rxq->packets = 0;
2275                         rxq->errors = 0;
2276                 }
2277         }
2278
2279         for (i = 0; i < avp->num_tx_queues; i++) {
2280                 struct avp_queue *txq = avp->dev_data->tx_queues[i];
2281
2282                 if (txq) {
2283                         txq->bytes = 0;
2284                         txq->packets = 0;
2285                         txq->errors = 0;
2286                 }
2287         }
2288 }
2289
2290 RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd.pci_drv);
2291 RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map);