net/avp: handle interrupt migration
[dpdk.git] / drivers / net / avp / avp_ethdev.c
1 /*
2  *   BSD LICENSE
3  *
4  * Copyright (c) 2013-2017, Wind River Systems, Inc.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1) Redistributions of source code must retain the above copyright notice,
10  * this list of conditions and the following disclaimer.
11  *
12  * 2) Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * 3) Neither the name of Wind River Systems nor the names of its contributors
17  * may be used to endorse or promote products derived from this software
18  * without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <stdint.h>
34 #include <string.h>
35 #include <stdio.h>
36 #include <errno.h>
37 #include <unistd.h>
38
39 #include <rte_ethdev.h>
40 #include <rte_memcpy.h>
41 #include <rte_string_fns.h>
42 #include <rte_memzone.h>
43 #include <rte_malloc.h>
44 #include <rte_atomic.h>
45 #include <rte_branch_prediction.h>
46 #include <rte_pci.h>
47 #include <rte_ether.h>
48 #include <rte_common.h>
49 #include <rte_cycles.h>
50 #include <rte_spinlock.h>
51 #include <rte_byteorder.h>
52 #include <rte_dev.h>
53 #include <rte_memory.h>
54 #include <rte_eal.h>
55 #include <rte_io.h>
56
57 #include "rte_avp_common.h"
58 #include "rte_avp_fifo.h"
59
60 #include "avp_logs.h"
61
62
63 static int avp_dev_create(struct rte_pci_device *pci_dev,
64                           struct rte_eth_dev *eth_dev);
65
66 static int avp_dev_configure(struct rte_eth_dev *dev);
67 static int avp_dev_start(struct rte_eth_dev *dev);
68 static void avp_dev_stop(struct rte_eth_dev *dev);
69 static void avp_dev_close(struct rte_eth_dev *dev);
70 static void avp_dev_info_get(struct rte_eth_dev *dev,
71                              struct rte_eth_dev_info *dev_info);
72 static void avp_vlan_offload_set(struct rte_eth_dev *dev, int mask);
73 static int avp_dev_link_update(struct rte_eth_dev *dev,
74                                __rte_unused int wait_to_complete);
75 static void avp_dev_promiscuous_enable(struct rte_eth_dev *dev);
76 static void avp_dev_promiscuous_disable(struct rte_eth_dev *dev);
77
78 static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev,
79                                   uint16_t rx_queue_id,
80                                   uint16_t nb_rx_desc,
81                                   unsigned int socket_id,
82                                   const struct rte_eth_rxconf *rx_conf,
83                                   struct rte_mempool *pool);
84
85 static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev,
86                                   uint16_t tx_queue_id,
87                                   uint16_t nb_tx_desc,
88                                   unsigned int socket_id,
89                                   const struct rte_eth_txconf *tx_conf);
90
91 static uint16_t avp_recv_scattered_pkts(void *rx_queue,
92                                         struct rte_mbuf **rx_pkts,
93                                         uint16_t nb_pkts);
94
95 static uint16_t avp_recv_pkts(void *rx_queue,
96                               struct rte_mbuf **rx_pkts,
97                               uint16_t nb_pkts);
98
99 static uint16_t avp_xmit_scattered_pkts(void *tx_queue,
100                                         struct rte_mbuf **tx_pkts,
101                                         uint16_t nb_pkts);
102
103 static uint16_t avp_xmit_pkts(void *tx_queue,
104                               struct rte_mbuf **tx_pkts,
105                               uint16_t nb_pkts);
106
107 static void avp_dev_rx_queue_release(void *rxq);
108 static void avp_dev_tx_queue_release(void *txq);
109
110 static void avp_dev_stats_get(struct rte_eth_dev *dev,
111                               struct rte_eth_stats *stats);
112 static void avp_dev_stats_reset(struct rte_eth_dev *dev);
113
114
115 #define AVP_DEV_TO_PCI(eth_dev) RTE_DEV_TO_PCI((eth_dev)->device)
116
117
118 #define AVP_MAX_RX_BURST 64
119 #define AVP_MAX_TX_BURST 64
120 #define AVP_MAX_MAC_ADDRS 1
121 #define AVP_MIN_RX_BUFSIZE ETHER_MIN_LEN
122
123
124 /*
125  * Defines the number of microseconds to wait before checking the response
126  * queue for completion.
127  */
128 #define AVP_REQUEST_DELAY_USECS (5000)
129
130 /*
131  * Defines the number times to check the response queue for completion before
132  * declaring a timeout.
133  */
134 #define AVP_MAX_REQUEST_RETRY (100)
135
136 /* Defines the current PCI driver version number */
137 #define AVP_DPDK_DRIVER_VERSION RTE_AVP_CURRENT_GUEST_VERSION
138
139 /*
140  * The set of PCI devices this driver supports
141  */
142 static const struct rte_pci_id pci_id_avp_map[] = {
143         { .vendor_id = RTE_AVP_PCI_VENDOR_ID,
144           .device_id = RTE_AVP_PCI_DEVICE_ID,
145           .subsystem_vendor_id = RTE_AVP_PCI_SUB_VENDOR_ID,
146           .subsystem_device_id = RTE_AVP_PCI_SUB_DEVICE_ID,
147           .class_id = RTE_CLASS_ANY_ID,
148         },
149
150         { .vendor_id = 0, /* sentinel */
151         },
152 };
153
154 /*
155  * dev_ops for avp, bare necessities for basic operation
156  */
157 static const struct eth_dev_ops avp_eth_dev_ops = {
158         .dev_configure       = avp_dev_configure,
159         .dev_start           = avp_dev_start,
160         .dev_stop            = avp_dev_stop,
161         .dev_close           = avp_dev_close,
162         .dev_infos_get       = avp_dev_info_get,
163         .vlan_offload_set    = avp_vlan_offload_set,
164         .stats_get           = avp_dev_stats_get,
165         .stats_reset         = avp_dev_stats_reset,
166         .link_update         = avp_dev_link_update,
167         .promiscuous_enable  = avp_dev_promiscuous_enable,
168         .promiscuous_disable = avp_dev_promiscuous_disable,
169         .rx_queue_setup      = avp_dev_rx_queue_setup,
170         .rx_queue_release    = avp_dev_rx_queue_release,
171         .tx_queue_setup      = avp_dev_tx_queue_setup,
172         .tx_queue_release    = avp_dev_tx_queue_release,
173 };
174
175 /**@{ AVP device flags */
176 #define AVP_F_PROMISC (1 << 1)
177 #define AVP_F_CONFIGURED (1 << 2)
178 #define AVP_F_LINKUP (1 << 3)
179 #define AVP_F_DETACHED (1 << 4)
180 /**@} */
181
182 /* Ethernet device validation marker */
183 #define AVP_ETHDEV_MAGIC 0x92972862
184
185 /*
186  * Defines the AVP device attributes which are attached to an RTE ethernet
187  * device
188  */
189 struct avp_dev {
190         uint32_t magic; /**< Memory validation marker */
191         uint64_t device_id; /**< Unique system identifier */
192         struct ether_addr ethaddr; /**< Host specified MAC address */
193         struct rte_eth_dev_data *dev_data;
194         /**< Back pointer to ethernet device data */
195         volatile uint32_t flags; /**< Device operational flags */
196         uint8_t port_id; /**< Ethernet port identifier */
197         struct rte_mempool *pool; /**< pkt mbuf mempool */
198         unsigned int guest_mbuf_size; /**< local pool mbuf size */
199         unsigned int host_mbuf_size; /**< host mbuf size */
200         unsigned int max_rx_pkt_len; /**< maximum receive unit */
201         uint32_t host_features; /**< Supported feature bitmap */
202         uint32_t features; /**< Enabled feature bitmap */
203         unsigned int num_tx_queues; /**< Negotiated number of transmit queues */
204         unsigned int max_tx_queues; /**< Maximum number of transmit queues */
205         unsigned int num_rx_queues; /**< Negotiated number of receive queues */
206         unsigned int max_rx_queues; /**< Maximum number of receive queues */
207
208         struct rte_avp_fifo *tx_q[RTE_AVP_MAX_QUEUES]; /**< TX queue */
209         struct rte_avp_fifo *rx_q[RTE_AVP_MAX_QUEUES]; /**< RX queue */
210         struct rte_avp_fifo *alloc_q[RTE_AVP_MAX_QUEUES];
211         /**< Allocated mbufs queue */
212         struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES];
213         /**< To be freed mbufs queue */
214
215         /* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */
216         rte_spinlock_t lock;
217
218         /* For request & response */
219         struct rte_avp_fifo *req_q; /**< Request queue */
220         struct rte_avp_fifo *resp_q; /**< Response queue */
221         void *host_sync_addr; /**< (host) Req/Resp Mem address */
222         void *sync_addr; /**< Req/Resp Mem address */
223         void *host_mbuf_addr; /**< (host) MBUF pool start address */
224         void *mbuf_addr; /**< MBUF pool start address */
225 } __rte_cache_aligned;
226
227 /* RTE ethernet private data */
228 struct avp_adapter {
229         struct avp_dev avp;
230 } __rte_cache_aligned;
231
232
233 /* 32-bit MMIO register write */
234 #define AVP_WRITE32(_value, _addr) rte_write32_relaxed((_value), (_addr))
235
236 /* 32-bit MMIO register read */
237 #define AVP_READ32(_addr) rte_read32_relaxed((_addr))
238
239 /* Macro to cast the ethernet device private data to a AVP object */
240 #define AVP_DEV_PRIVATE_TO_HW(adapter) \
241         (&((struct avp_adapter *)adapter)->avp)
242
243 /*
244  * Defines the structure of a AVP device queue for the purpose of handling the
245  * receive and transmit burst callback functions
246  */
247 struct avp_queue {
248         struct rte_eth_dev_data *dev_data;
249         /**< Backpointer to ethernet device data */
250         struct avp_dev *avp; /**< Backpointer to AVP device */
251         uint16_t queue_id;
252         /**< Queue identifier used for indexing current queue */
253         uint16_t queue_base;
254         /**< Base queue identifier for queue servicing */
255         uint16_t queue_limit;
256         /**< Maximum queue identifier for queue servicing */
257
258         uint64_t packets;
259         uint64_t bytes;
260         uint64_t errors;
261 };
262
263 /* send a request and wait for a response
264  *
265  * @warning must be called while holding the avp->lock spinlock.
266  */
267 static int
268 avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request)
269 {
270         unsigned int retry = AVP_MAX_REQUEST_RETRY;
271         void *resp_addr = NULL;
272         unsigned int count;
273         int ret;
274
275         PMD_DRV_LOG(DEBUG, "Sending request %u to host\n", request->req_id);
276
277         request->result = -ENOTSUP;
278
279         /* Discard any stale responses before starting a new request */
280         while (avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1))
281                 PMD_DRV_LOG(DEBUG, "Discarding stale response\n");
282
283         rte_memcpy(avp->sync_addr, request, sizeof(*request));
284         count = avp_fifo_put(avp->req_q, &avp->host_sync_addr, 1);
285         if (count < 1) {
286                 PMD_DRV_LOG(ERR, "Cannot send request %u to host\n",
287                             request->req_id);
288                 ret = -EBUSY;
289                 goto done;
290         }
291
292         while (retry--) {
293                 /* wait for a response */
294                 usleep(AVP_REQUEST_DELAY_USECS);
295
296                 count = avp_fifo_count(avp->resp_q);
297                 if (count >= 1) {
298                         /* response received */
299                         break;
300                 }
301
302                 if ((count < 1) && (retry == 0)) {
303                         PMD_DRV_LOG(ERR, "Timeout while waiting for a response for %u\n",
304                                     request->req_id);
305                         ret = -ETIME;
306                         goto done;
307                 }
308         }
309
310         /* retrieve the response */
311         count = avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1);
312         if ((count != 1) || (resp_addr != avp->host_sync_addr)) {
313                 PMD_DRV_LOG(ERR, "Invalid response from host, count=%u resp=%p host_sync_addr=%p\n",
314                             count, resp_addr, avp->host_sync_addr);
315                 ret = -ENODATA;
316                 goto done;
317         }
318
319         /* copy to user buffer */
320         rte_memcpy(request, avp->sync_addr, sizeof(*request));
321         ret = 0;
322
323         PMD_DRV_LOG(DEBUG, "Result %d received for request %u\n",
324                     request->result, request->req_id);
325
326 done:
327         return ret;
328 }
329
330 static int
331 avp_dev_ctrl_set_link_state(struct rte_eth_dev *eth_dev, unsigned int state)
332 {
333         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
334         struct rte_avp_request request;
335         int ret;
336
337         /* setup a link state change request */
338         memset(&request, 0, sizeof(request));
339         request.req_id = RTE_AVP_REQ_CFG_NETWORK_IF;
340         request.if_up = state;
341
342         ret = avp_dev_process_request(avp, &request);
343
344         return ret == 0 ? request.result : ret;
345 }
346
347 static int
348 avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev,
349                         struct rte_avp_device_config *config)
350 {
351         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
352         struct rte_avp_request request;
353         int ret;
354
355         /* setup a configure request */
356         memset(&request, 0, sizeof(request));
357         request.req_id = RTE_AVP_REQ_CFG_DEVICE;
358         memcpy(&request.config, config, sizeof(request.config));
359
360         ret = avp_dev_process_request(avp, &request);
361
362         return ret == 0 ? request.result : ret;
363 }
364
365 static int
366 avp_dev_ctrl_shutdown(struct rte_eth_dev *eth_dev)
367 {
368         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
369         struct rte_avp_request request;
370         int ret;
371
372         /* setup a shutdown request */
373         memset(&request, 0, sizeof(request));
374         request.req_id = RTE_AVP_REQ_SHUTDOWN_DEVICE;
375
376         ret = avp_dev_process_request(avp, &request);
377
378         return ret == 0 ? request.result : ret;
379 }
380
381 /* translate from host mbuf virtual address to guest virtual address */
382 static inline void *
383 avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address)
384 {
385         return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address,
386                                        (uintptr_t)avp->host_mbuf_addr),
387                            (uintptr_t)avp->mbuf_addr);
388 }
389
390 /* translate from host physical address to guest virtual address */
391 static void *
392 avp_dev_translate_address(struct rte_eth_dev *eth_dev,
393                           phys_addr_t host_phys_addr)
394 {
395         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
396         struct rte_mem_resource *resource;
397         struct rte_avp_memmap_info *info;
398         struct rte_avp_memmap *map;
399         off_t offset;
400         void *addr;
401         unsigned int i;
402
403         addr = pci_dev->mem_resource[RTE_AVP_PCI_MEMORY_BAR].addr;
404         resource = &pci_dev->mem_resource[RTE_AVP_PCI_MEMMAP_BAR];
405         info = (struct rte_avp_memmap_info *)resource->addr;
406
407         offset = 0;
408         for (i = 0; i < info->nb_maps; i++) {
409                 /* search all segments looking for a matching address */
410                 map = &info->maps[i];
411
412                 if ((host_phys_addr >= map->phys_addr) &&
413                         (host_phys_addr < (map->phys_addr + map->length))) {
414                         /* address is within this segment */
415                         offset += (host_phys_addr - map->phys_addr);
416                         addr = RTE_PTR_ADD(addr, offset);
417
418                         PMD_DRV_LOG(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p\n",
419                                     host_phys_addr, addr);
420
421                         return addr;
422                 }
423                 offset += map->length;
424         }
425
426         return NULL;
427 }
428
429 /* verify that the incoming device version is compatible with our version */
430 static int
431 avp_dev_version_check(uint32_t version)
432 {
433         uint32_t driver = RTE_AVP_STRIP_MINOR_VERSION(AVP_DPDK_DRIVER_VERSION);
434         uint32_t device = RTE_AVP_STRIP_MINOR_VERSION(version);
435
436         if (device <= driver) {
437                 /* the host driver version is less than or equal to ours */
438                 return 0;
439         }
440
441         return 1;
442 }
443
444 /* verify that memory regions have expected version and validation markers */
445 static int
446 avp_dev_check_regions(struct rte_eth_dev *eth_dev)
447 {
448         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
449         struct rte_avp_memmap_info *memmap;
450         struct rte_avp_device_info *info;
451         struct rte_mem_resource *resource;
452         unsigned int i;
453
454         /* Dump resource info for debug */
455         for (i = 0; i < PCI_MAX_RESOURCE; i++) {
456                 resource = &pci_dev->mem_resource[i];
457                 if ((resource->phys_addr == 0) || (resource->len == 0))
458                         continue;
459
460                 PMD_DRV_LOG(DEBUG, "resource[%u]: phys=0x%" PRIx64 " len=%" PRIu64 " addr=%p\n",
461                             i, resource->phys_addr,
462                             resource->len, resource->addr);
463
464                 switch (i) {
465                 case RTE_AVP_PCI_MEMMAP_BAR:
466                         memmap = (struct rte_avp_memmap_info *)resource->addr;
467                         if ((memmap->magic != RTE_AVP_MEMMAP_MAGIC) ||
468                             (memmap->version != RTE_AVP_MEMMAP_VERSION)) {
469                                 PMD_DRV_LOG(ERR, "Invalid memmap magic 0x%08x and version %u\n",
470                                             memmap->magic, memmap->version);
471                                 return -EINVAL;
472                         }
473                         break;
474
475                 case RTE_AVP_PCI_DEVICE_BAR:
476                         info = (struct rte_avp_device_info *)resource->addr;
477                         if ((info->magic != RTE_AVP_DEVICE_MAGIC) ||
478                             avp_dev_version_check(info->version)) {
479                                 PMD_DRV_LOG(ERR, "Invalid device info magic 0x%08x or version 0x%08x > 0x%08x\n",
480                                             info->magic, info->version,
481                                             AVP_DPDK_DRIVER_VERSION);
482                                 return -EINVAL;
483                         }
484                         break;
485
486                 case RTE_AVP_PCI_MEMORY_BAR:
487                 case RTE_AVP_PCI_MMIO_BAR:
488                         if (resource->addr == NULL) {
489                                 PMD_DRV_LOG(ERR, "Missing address space for BAR%u\n",
490                                             i);
491                                 return -EINVAL;
492                         }
493                         break;
494
495                 case RTE_AVP_PCI_MSIX_BAR:
496                 default:
497                         /* no validation required */
498                         break;
499                 }
500         }
501
502         return 0;
503 }
504
505 static int
506 avp_dev_detach(struct rte_eth_dev *eth_dev)
507 {
508         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
509         int ret;
510
511         PMD_DRV_LOG(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "\n",
512                     eth_dev->data->port_id, avp->device_id);
513
514         rte_spinlock_lock(&avp->lock);
515
516         if (avp->flags & AVP_F_DETACHED) {
517                 PMD_DRV_LOG(NOTICE, "port %u already detached\n",
518                             eth_dev->data->port_id);
519                 ret = 0;
520                 goto unlock;
521         }
522
523         /* shutdown the device first so the host stops sending us packets. */
524         ret = avp_dev_ctrl_shutdown(eth_dev);
525         if (ret < 0) {
526                 PMD_DRV_LOG(ERR, "Failed to send/recv shutdown to host, ret=%d\n",
527                             ret);
528                 avp->flags &= ~AVP_F_DETACHED;
529                 goto unlock;
530         }
531
532         avp->flags |= AVP_F_DETACHED;
533         rte_wmb();
534
535         /* wait for queues to acknowledge the presence of the detach flag */
536         rte_delay_ms(1);
537
538         ret = 0;
539
540 unlock:
541         rte_spinlock_unlock(&avp->lock);
542         return ret;
543 }
544
545 static void
546 _avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
547 {
548         struct avp_dev *avp =
549                 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
550         struct avp_queue *rxq;
551         uint16_t queue_count;
552         uint16_t remainder;
553
554         rxq = (struct avp_queue *)eth_dev->data->rx_queues[rx_queue_id];
555
556         /*
557          * Must map all AVP fifos as evenly as possible between the configured
558          * device queues.  Each device queue will service a subset of the AVP
559          * fifos. If there is an odd number of device queues the first set of
560          * device queues will get the extra AVP fifos.
561          */
562         queue_count = avp->num_rx_queues / eth_dev->data->nb_rx_queues;
563         remainder = avp->num_rx_queues % eth_dev->data->nb_rx_queues;
564         if (rx_queue_id < remainder) {
565                 /* these queues must service one extra FIFO */
566                 rxq->queue_base = rx_queue_id * (queue_count + 1);
567                 rxq->queue_limit = rxq->queue_base + (queue_count + 1) - 1;
568         } else {
569                 /* these queues service the regular number of FIFO */
570                 rxq->queue_base = ((remainder * (queue_count + 1)) +
571                                    ((rx_queue_id - remainder) * queue_count));
572                 rxq->queue_limit = rxq->queue_base + queue_count - 1;
573         }
574
575         PMD_DRV_LOG(DEBUG, "rxq %u at %p base %u limit %u\n",
576                     rx_queue_id, rxq, rxq->queue_base, rxq->queue_limit);
577
578         rxq->queue_id = rxq->queue_base;
579 }
580
581 static void
582 _avp_set_queue_counts(struct rte_eth_dev *eth_dev)
583 {
584         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
585         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
586         struct rte_avp_device_info *host_info;
587         void *addr;
588
589         addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
590         host_info = (struct rte_avp_device_info *)addr;
591
592         /*
593          * the transmit direction is not negotiated beyond respecting the max
594          * number of queues because the host can handle arbitrary guest tx
595          * queues (host rx queues).
596          */
597         avp->num_tx_queues = eth_dev->data->nb_tx_queues;
598
599         /*
600          * the receive direction is more restrictive.  The host requires a
601          * minimum number of guest rx queues (host tx queues) therefore
602          * negotiate a value that is at least as large as the host minimum
603          * requirement.  If the host and guest values are not identical then a
604          * mapping will be established in the receive_queue_setup function.
605          */
606         avp->num_rx_queues = RTE_MAX(host_info->min_rx_queues,
607                                      eth_dev->data->nb_rx_queues);
608
609         PMD_DRV_LOG(DEBUG, "Requesting %u Tx and %u Rx queues from host\n",
610                     avp->num_tx_queues, avp->num_rx_queues);
611 }
612
613 static int
614 avp_dev_attach(struct rte_eth_dev *eth_dev)
615 {
616         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
617         struct rte_avp_device_config config;
618         unsigned int i;
619         int ret;
620
621         PMD_DRV_LOG(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "\n",
622                     eth_dev->data->port_id, avp->device_id);
623
624         rte_spinlock_lock(&avp->lock);
625
626         if (!(avp->flags & AVP_F_DETACHED)) {
627                 PMD_DRV_LOG(NOTICE, "port %u already attached\n",
628                             eth_dev->data->port_id);
629                 ret = 0;
630                 goto unlock;
631         }
632
633         /*
634          * make sure that the detached flag is set prior to reconfiguring the
635          * queues.
636          */
637         avp->flags |= AVP_F_DETACHED;
638         rte_wmb();
639
640         /*
641          * re-run the device create utility which will parse the new host info
642          * and setup the AVP device queue pointers.
643          */
644         ret = avp_dev_create(AVP_DEV_TO_PCI(eth_dev), eth_dev);
645         if (ret < 0) {
646                 PMD_DRV_LOG(ERR, "Failed to re-create AVP device, ret=%d\n",
647                             ret);
648                 goto unlock;
649         }
650
651         if (avp->flags & AVP_F_CONFIGURED) {
652                 /*
653                  * Update the receive queue mapping to handle cases where the
654                  * source and destination hosts have different queue
655                  * requirements.  As long as the DETACHED flag is asserted the
656                  * queue table should not be referenced so it should be safe to
657                  * update it.
658                  */
659                 _avp_set_queue_counts(eth_dev);
660                 for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
661                         _avp_set_rx_queue_mappings(eth_dev, i);
662
663                 /*
664                  * Update the host with our config details so that it knows the
665                  * device is active.
666                  */
667                 memset(&config, 0, sizeof(config));
668                 config.device_id = avp->device_id;
669                 config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
670                 config.driver_version = AVP_DPDK_DRIVER_VERSION;
671                 config.features = avp->features;
672                 config.num_tx_queues = avp->num_tx_queues;
673                 config.num_rx_queues = avp->num_rx_queues;
674                 config.if_up = !!(avp->flags & AVP_F_LINKUP);
675
676                 ret = avp_dev_ctrl_set_config(eth_dev, &config);
677                 if (ret < 0) {
678                         PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
679                                     ret);
680                         goto unlock;
681                 }
682         }
683
684         rte_wmb();
685         avp->flags &= ~AVP_F_DETACHED;
686
687         ret = 0;
688
689 unlock:
690         rte_spinlock_unlock(&avp->lock);
691         return ret;
692 }
693
694 static void
695 avp_dev_interrupt_handler(struct rte_intr_handle *intr_handle,
696                                                   void *data)
697 {
698         struct rte_eth_dev *eth_dev = data;
699         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
700         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
701         uint32_t status, value;
702         int ret;
703
704         if (registers == NULL)
705                 rte_panic("no mapped MMIO register space\n");
706
707         /* read the interrupt status register
708          * note: this register clears on read so all raised interrupts must be
709          *    handled or remembered for later processing
710          */
711         status = AVP_READ32(
712                 RTE_PTR_ADD(registers,
713                             RTE_AVP_INTERRUPT_STATUS_OFFSET));
714
715         if (status | RTE_AVP_MIGRATION_INTERRUPT_MASK) {
716                 /* handle interrupt based on current status */
717                 value = AVP_READ32(
718                         RTE_PTR_ADD(registers,
719                                     RTE_AVP_MIGRATION_STATUS_OFFSET));
720                 switch (value) {
721                 case RTE_AVP_MIGRATION_DETACHED:
722                         ret = avp_dev_detach(eth_dev);
723                         break;
724                 case RTE_AVP_MIGRATION_ATTACHED:
725                         ret = avp_dev_attach(eth_dev);
726                         break;
727                 default:
728                         PMD_DRV_LOG(ERR, "unexpected migration status, status=%u\n",
729                                     value);
730                         ret = -EINVAL;
731                 }
732
733                 /* acknowledge the request by writing out our current status */
734                 value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR);
735                 AVP_WRITE32(value,
736                             RTE_PTR_ADD(registers,
737                                         RTE_AVP_MIGRATION_ACK_OFFSET));
738
739                 PMD_DRV_LOG(NOTICE, "AVP migration interrupt handled\n");
740         }
741
742         if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK)
743                 PMD_DRV_LOG(WARNING, "AVP unexpected interrupt, status=0x%08x\n",
744                             status);
745
746         /* re-enable UIO interrupt handling */
747         ret = rte_intr_enable(intr_handle);
748         if (ret < 0) {
749                 PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n",
750                             ret);
751                 /* continue */
752         }
753 }
754
755 static int
756 avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev)
757 {
758         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
759         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
760         int ret;
761
762         if (registers == NULL)
763                 return -EINVAL;
764
765         /* enable UIO interrupt handling */
766         ret = rte_intr_enable(&pci_dev->intr_handle);
767         if (ret < 0) {
768                 PMD_DRV_LOG(ERR, "Failed to enable UIO interrupts, ret=%d\n",
769                             ret);
770                 return ret;
771         }
772
773         /* inform the device that all interrupts are enabled */
774         AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK,
775                     RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
776
777         return 0;
778 }
779
780 static int
781 avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev)
782 {
783         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
784         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
785         int ret;
786
787         if (registers == NULL)
788                 return 0;
789
790         /* inform the device that all interrupts are disabled */
791         AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK,
792                     RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
793
794         /* enable UIO interrupt handling */
795         ret = rte_intr_disable(&pci_dev->intr_handle);
796         if (ret < 0) {
797                 PMD_DRV_LOG(ERR, "Failed to disable UIO interrupts, ret=%d\n",
798                             ret);
799                 return ret;
800         }
801
802         return 0;
803 }
804
805 static int
806 avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev)
807 {
808         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
809         int ret;
810
811         /* register a callback handler with UIO for interrupt notifications */
812         ret = rte_intr_callback_register(&pci_dev->intr_handle,
813                                          avp_dev_interrupt_handler,
814                                          (void *)eth_dev);
815         if (ret < 0) {
816                 PMD_DRV_LOG(ERR, "Failed to register UIO interrupt callback, ret=%d\n",
817                             ret);
818                 return ret;
819         }
820
821         /* enable interrupt processing */
822         return avp_dev_enable_interrupts(eth_dev);
823 }
824
825 static int
826 avp_dev_migration_pending(struct rte_eth_dev *eth_dev)
827 {
828         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
829         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
830         uint32_t value;
831
832         if (registers == NULL)
833                 return 0;
834
835         value = AVP_READ32(RTE_PTR_ADD(registers,
836                                        RTE_AVP_MIGRATION_STATUS_OFFSET));
837         if (value == RTE_AVP_MIGRATION_DETACHED) {
838                 /* migration is in progress; ack it if we have not already */
839                 AVP_WRITE32(value,
840                             RTE_PTR_ADD(registers,
841                                         RTE_AVP_MIGRATION_ACK_OFFSET));
842                 return 1;
843         }
844         return 0;
845 }
846
847 /*
848  * create a AVP device using the supplied device info by first translating it
849  * to guest address space(s).
850  */
851 static int
852 avp_dev_create(struct rte_pci_device *pci_dev,
853                struct rte_eth_dev *eth_dev)
854 {
855         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
856         struct rte_avp_device_info *host_info;
857         struct rte_mem_resource *resource;
858         unsigned int i;
859
860         resource = &pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR];
861         if (resource->addr == NULL) {
862                 PMD_DRV_LOG(ERR, "BAR%u is not mapped\n",
863                             RTE_AVP_PCI_DEVICE_BAR);
864                 return -EFAULT;
865         }
866         host_info = (struct rte_avp_device_info *)resource->addr;
867
868         if ((host_info->magic != RTE_AVP_DEVICE_MAGIC) ||
869                 avp_dev_version_check(host_info->version)) {
870                 PMD_DRV_LOG(ERR, "Invalid AVP PCI device, magic 0x%08x version 0x%08x > 0x%08x\n",
871                             host_info->magic, host_info->version,
872                             AVP_DPDK_DRIVER_VERSION);
873                 return -EINVAL;
874         }
875
876         PMD_DRV_LOG(DEBUG, "AVP host device is v%u.%u.%u\n",
877                     RTE_AVP_GET_RELEASE_VERSION(host_info->version),
878                     RTE_AVP_GET_MAJOR_VERSION(host_info->version),
879                     RTE_AVP_GET_MINOR_VERSION(host_info->version));
880
881         PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u TX queue(s)\n",
882                     host_info->min_tx_queues, host_info->max_tx_queues);
883         PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u RX queue(s)\n",
884                     host_info->min_rx_queues, host_info->max_rx_queues);
885         PMD_DRV_LOG(DEBUG, "AVP host supports features 0x%08x\n",
886                     host_info->features);
887
888         if (avp->magic != AVP_ETHDEV_MAGIC) {
889                 /*
890                  * First time initialization (i.e., not during a VM
891                  * migration)
892                  */
893                 memset(avp, 0, sizeof(*avp));
894                 avp->magic = AVP_ETHDEV_MAGIC;
895                 avp->dev_data = eth_dev->data;
896                 avp->port_id = eth_dev->data->port_id;
897                 avp->host_mbuf_size = host_info->mbuf_size;
898                 avp->host_features = host_info->features;
899                 rte_spinlock_init(&avp->lock);
900                 memcpy(&avp->ethaddr.addr_bytes[0],
901                        host_info->ethaddr, ETHER_ADDR_LEN);
902                 /* adjust max values to not exceed our max */
903                 avp->max_tx_queues =
904                         RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES);
905                 avp->max_rx_queues =
906                         RTE_MIN(host_info->max_rx_queues, RTE_AVP_MAX_QUEUES);
907         } else {
908                 /* Re-attaching during migration */
909
910                 /* TODO... requires validation of host values */
911                 if ((host_info->features & avp->features) != avp->features) {
912                         PMD_DRV_LOG(ERR, "AVP host features mismatched; 0x%08x, host=0x%08x\n",
913                                     avp->features, host_info->features);
914                         /* this should not be possible; continue for now */
915                 }
916         }
917
918         /* the device id is allowed to change over migrations */
919         avp->device_id = host_info->device_id;
920
921         /* translate incoming host addresses to guest address space */
922         PMD_DRV_LOG(DEBUG, "AVP first host tx queue at 0x%" PRIx64 "\n",
923                     host_info->tx_phys);
924         PMD_DRV_LOG(DEBUG, "AVP first host alloc queue at 0x%" PRIx64 "\n",
925                     host_info->alloc_phys);
926         for (i = 0; i < avp->max_tx_queues; i++) {
927                 avp->tx_q[i] = avp_dev_translate_address(eth_dev,
928                         host_info->tx_phys + (i * host_info->tx_size));
929
930                 avp->alloc_q[i] = avp_dev_translate_address(eth_dev,
931                         host_info->alloc_phys + (i * host_info->alloc_size));
932         }
933
934         PMD_DRV_LOG(DEBUG, "AVP first host rx queue at 0x%" PRIx64 "\n",
935                     host_info->rx_phys);
936         PMD_DRV_LOG(DEBUG, "AVP first host free queue at 0x%" PRIx64 "\n",
937                     host_info->free_phys);
938         for (i = 0; i < avp->max_rx_queues; i++) {
939                 avp->rx_q[i] = avp_dev_translate_address(eth_dev,
940                         host_info->rx_phys + (i * host_info->rx_size));
941                 avp->free_q[i] = avp_dev_translate_address(eth_dev,
942                         host_info->free_phys + (i * host_info->free_size));
943         }
944
945         PMD_DRV_LOG(DEBUG, "AVP host request queue at 0x%" PRIx64 "\n",
946                     host_info->req_phys);
947         PMD_DRV_LOG(DEBUG, "AVP host response queue at 0x%" PRIx64 "\n",
948                     host_info->resp_phys);
949         PMD_DRV_LOG(DEBUG, "AVP host sync address at 0x%" PRIx64 "\n",
950                     host_info->sync_phys);
951         PMD_DRV_LOG(DEBUG, "AVP host mbuf address at 0x%" PRIx64 "\n",
952                     host_info->mbuf_phys);
953         avp->req_q = avp_dev_translate_address(eth_dev, host_info->req_phys);
954         avp->resp_q = avp_dev_translate_address(eth_dev, host_info->resp_phys);
955         avp->sync_addr =
956                 avp_dev_translate_address(eth_dev, host_info->sync_phys);
957         avp->mbuf_addr =
958                 avp_dev_translate_address(eth_dev, host_info->mbuf_phys);
959
960         /*
961          * store the host mbuf virtual address so that we can calculate
962          * relative offsets for each mbuf as they are processed
963          */
964         avp->host_mbuf_addr = host_info->mbuf_va;
965         avp->host_sync_addr = host_info->sync_va;
966
967         /*
968          * store the maximum packet length that is supported by the host.
969          */
970         avp->max_rx_pkt_len = host_info->max_rx_pkt_len;
971         PMD_DRV_LOG(DEBUG, "AVP host max receive packet length is %u\n",
972                                 host_info->max_rx_pkt_len);
973
974         return 0;
975 }
976
977 /*
978  * This function is based on probe() function in avp_pci.c
979  * It returns 0 on success.
980  */
981 static int
982 eth_avp_dev_init(struct rte_eth_dev *eth_dev)
983 {
984         struct avp_dev *avp =
985                 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
986         struct rte_pci_device *pci_dev;
987         int ret;
988
989         pci_dev = AVP_DEV_TO_PCI(eth_dev);
990         eth_dev->dev_ops = &avp_eth_dev_ops;
991         eth_dev->rx_pkt_burst = &avp_recv_pkts;
992         eth_dev->tx_pkt_burst = &avp_xmit_pkts;
993
994         if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
995                 /*
996                  * no setup required on secondary processes.  All data is saved
997                  * in dev_private by the primary process. All resource should
998                  * be mapped to the same virtual address so all pointers should
999                  * be valid.
1000                  */
1001                 if (eth_dev->data->scattered_rx) {
1002                         PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
1003                         eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1004                         eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1005                 }
1006                 return 0;
1007         }
1008
1009         rte_eth_copy_pci_info(eth_dev, pci_dev);
1010
1011         eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
1012
1013         /* Check current migration status */
1014         if (avp_dev_migration_pending(eth_dev)) {
1015                 PMD_DRV_LOG(ERR, "VM live migration operation in progress\n");
1016                 return -EBUSY;
1017         }
1018
1019         /* Check BAR resources */
1020         ret = avp_dev_check_regions(eth_dev);
1021         if (ret < 0) {
1022                 PMD_DRV_LOG(ERR, "Failed to validate BAR resources, ret=%d\n",
1023                             ret);
1024                 return ret;
1025         }
1026
1027         /* Enable interrupts */
1028         ret = avp_dev_setup_interrupts(eth_dev);
1029         if (ret < 0) {
1030                 PMD_DRV_LOG(ERR, "Failed to enable interrupts, ret=%d\n", ret);
1031                 return ret;
1032         }
1033
1034         /* Handle each subtype */
1035         ret = avp_dev_create(pci_dev, eth_dev);
1036         if (ret < 0) {
1037                 PMD_DRV_LOG(ERR, "Failed to create device, ret=%d\n", ret);
1038                 return ret;
1039         }
1040
1041         /* Allocate memory for storing MAC addresses */
1042         eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev", ETHER_ADDR_LEN, 0);
1043         if (eth_dev->data->mac_addrs == NULL) {
1044                 PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to store MAC addresses\n",
1045                             ETHER_ADDR_LEN);
1046                 return -ENOMEM;
1047         }
1048
1049         /* Get a mac from device config */
1050         ether_addr_copy(&avp->ethaddr, &eth_dev->data->mac_addrs[0]);
1051
1052         return 0;
1053 }
1054
1055 static int
1056 eth_avp_dev_uninit(struct rte_eth_dev *eth_dev)
1057 {
1058         int ret;
1059
1060         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1061                 return -EPERM;
1062
1063         if (eth_dev->data == NULL)
1064                 return 0;
1065
1066         ret = avp_dev_disable_interrupts(eth_dev);
1067         if (ret != 0) {
1068                 PMD_DRV_LOG(ERR, "Failed to disable interrupts, ret=%d\n", ret);
1069                 return ret;
1070         }
1071
1072         if (eth_dev->data->mac_addrs != NULL) {
1073                 rte_free(eth_dev->data->mac_addrs);
1074                 eth_dev->data->mac_addrs = NULL;
1075         }
1076
1077         return 0;
1078 }
1079
1080
1081 static struct eth_driver rte_avp_pmd = {
1082         {
1083                 .id_table = pci_id_avp_map,
1084                 .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
1085                 .probe = rte_eth_dev_pci_probe,
1086                 .remove = rte_eth_dev_pci_remove,
1087         },
1088         .eth_dev_init = eth_avp_dev_init,
1089         .eth_dev_uninit = eth_avp_dev_uninit,
1090         .dev_private_size = sizeof(struct avp_adapter),
1091 };
1092
1093 static int
1094 avp_dev_enable_scattered(struct rte_eth_dev *eth_dev,
1095                          struct avp_dev *avp)
1096 {
1097         unsigned int max_rx_pkt_len;
1098
1099         max_rx_pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
1100
1101         if ((max_rx_pkt_len > avp->guest_mbuf_size) ||
1102             (max_rx_pkt_len > avp->host_mbuf_size)) {
1103                 /*
1104                  * If the guest MTU is greater than either the host or guest
1105                  * buffers then chained mbufs have to be enabled in the TX
1106                  * direction.  It is assumed that the application will not need
1107                  * to send packets larger than their max_rx_pkt_len (MRU).
1108                  */
1109                 return 1;
1110         }
1111
1112         if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) ||
1113             (avp->max_rx_pkt_len > avp->host_mbuf_size)) {
1114                 /*
1115                  * If the host MRU is greater than its own mbuf size or the
1116                  * guest mbuf size then chained mbufs have to be enabled in the
1117                  * RX direction.
1118                  */
1119                 return 1;
1120         }
1121
1122         return 0;
1123 }
1124
1125 static int
1126 avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
1127                        uint16_t rx_queue_id,
1128                        uint16_t nb_rx_desc,
1129                        unsigned int socket_id,
1130                        const struct rte_eth_rxconf *rx_conf,
1131                        struct rte_mempool *pool)
1132 {
1133         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1134         struct rte_pktmbuf_pool_private *mbp_priv;
1135         struct avp_queue *rxq;
1136
1137         if (rx_queue_id >= eth_dev->data->nb_rx_queues) {
1138                 PMD_DRV_LOG(ERR, "RX queue id is out of range: rx_queue_id=%u, nb_rx_queues=%u\n",
1139                             rx_queue_id, eth_dev->data->nb_rx_queues);
1140                 return -EINVAL;
1141         }
1142
1143         /* Save mbuf pool pointer */
1144         avp->pool = pool;
1145
1146         /* Save the local mbuf size */
1147         mbp_priv = rte_mempool_get_priv(pool);
1148         avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size);
1149         avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM;
1150
1151         if (avp_dev_enable_scattered(eth_dev, avp)) {
1152                 if (!eth_dev->data->scattered_rx) {
1153                         PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
1154                         eth_dev->data->scattered_rx = 1;
1155                         eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1156                         eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1157                 }
1158         }
1159
1160         PMD_DRV_LOG(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)\n",
1161                     avp->max_rx_pkt_len,
1162                     eth_dev->data->dev_conf.rxmode.max_rx_pkt_len,
1163                     avp->host_mbuf_size,
1164                     avp->guest_mbuf_size);
1165
1166         /* allocate a queue object */
1167         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct avp_queue),
1168                                  RTE_CACHE_LINE_SIZE, socket_id);
1169         if (rxq == NULL) {
1170                 PMD_DRV_LOG(ERR, "Failed to allocate new Rx queue object\n");
1171                 return -ENOMEM;
1172         }
1173
1174         /* save back pointers to AVP and Ethernet devices */
1175         rxq->avp = avp;
1176         rxq->dev_data = eth_dev->data;
1177         eth_dev->data->rx_queues[rx_queue_id] = (void *)rxq;
1178
1179         /* setup the queue receive mapping for the current queue. */
1180         _avp_set_rx_queue_mappings(eth_dev, rx_queue_id);
1181
1182         PMD_DRV_LOG(DEBUG, "Rx queue %u setup at %p\n", rx_queue_id, rxq);
1183
1184         (void)nb_rx_desc;
1185         (void)rx_conf;
1186         return 0;
1187 }
1188
1189 static int
1190 avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev,
1191                        uint16_t tx_queue_id,
1192                        uint16_t nb_tx_desc,
1193                        unsigned int socket_id,
1194                        const struct rte_eth_txconf *tx_conf)
1195 {
1196         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1197         struct avp_queue *txq;
1198
1199         if (tx_queue_id >= eth_dev->data->nb_tx_queues) {
1200                 PMD_DRV_LOG(ERR, "TX queue id is out of range: tx_queue_id=%u, nb_tx_queues=%u\n",
1201                             tx_queue_id, eth_dev->data->nb_tx_queues);
1202                 return -EINVAL;
1203         }
1204
1205         /* allocate a queue object */
1206         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct avp_queue),
1207                                  RTE_CACHE_LINE_SIZE, socket_id);
1208         if (txq == NULL) {
1209                 PMD_DRV_LOG(ERR, "Failed to allocate new Tx queue object\n");
1210                 return -ENOMEM;
1211         }
1212
1213         /* only the configured set of transmit queues are used */
1214         txq->queue_id = tx_queue_id;
1215         txq->queue_base = tx_queue_id;
1216         txq->queue_limit = tx_queue_id;
1217
1218         /* save back pointers to AVP and Ethernet devices */
1219         txq->avp = avp;
1220         txq->dev_data = eth_dev->data;
1221         eth_dev->data->tx_queues[tx_queue_id] = (void *)txq;
1222
1223         PMD_DRV_LOG(DEBUG, "Tx queue %u setup at %p\n", tx_queue_id, txq);
1224
1225         (void)nb_tx_desc;
1226         (void)tx_conf;
1227         return 0;
1228 }
1229
1230 static inline int
1231 _avp_cmp_ether_addr(struct ether_addr *a, struct ether_addr *b)
1232 {
1233         uint16_t *_a = (uint16_t *)&a->addr_bytes[0];
1234         uint16_t *_b = (uint16_t *)&b->addr_bytes[0];
1235         return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]);
1236 }
1237
1238 static inline int
1239 _avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m)
1240 {
1241         struct ether_hdr *eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
1242
1243         if (likely(_avp_cmp_ether_addr(&avp->ethaddr, &eth->d_addr) == 0)) {
1244                 /* allow all packets destined to our address */
1245                 return 0;
1246         }
1247
1248         if (likely(is_broadcast_ether_addr(&eth->d_addr))) {
1249                 /* allow all broadcast packets */
1250                 return 0;
1251         }
1252
1253         if (likely(is_multicast_ether_addr(&eth->d_addr))) {
1254                 /* allow all multicast packets */
1255                 return 0;
1256         }
1257
1258         if (avp->flags & AVP_F_PROMISC) {
1259                 /* allow all packets when in promiscuous mode */
1260                 return 0;
1261         }
1262
1263         return -1;
1264 }
1265
1266 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1267 static inline void
1268 __avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf)
1269 {
1270         struct rte_avp_desc *first_buf;
1271         struct rte_avp_desc *pkt_buf;
1272         unsigned int pkt_len;
1273         unsigned int nb_segs;
1274         void *pkt_data;
1275         unsigned int i;
1276
1277         first_buf = avp_dev_translate_buffer(avp, buf);
1278
1279         i = 0;
1280         pkt_len = 0;
1281         nb_segs = first_buf->nb_segs;
1282         do {
1283                 /* Adjust pointers for guest addressing */
1284                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1285                 if (pkt_buf == NULL)
1286                         rte_panic("bad buffer: segment %u has an invalid address %p\n",
1287                                   i, buf);
1288                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1289                 if (pkt_data == NULL)
1290                         rte_panic("bad buffer: segment %u has a NULL data pointer\n",
1291                                   i);
1292                 if (pkt_buf->data_len == 0)
1293                         rte_panic("bad buffer: segment %u has 0 data length\n",
1294                                   i);
1295                 pkt_len += pkt_buf->data_len;
1296                 nb_segs--;
1297                 i++;
1298
1299         } while (nb_segs && (buf = pkt_buf->next) != NULL);
1300
1301         if (nb_segs != 0)
1302                 rte_panic("bad buffer: expected %u segments found %u\n",
1303                           first_buf->nb_segs, (first_buf->nb_segs - nb_segs));
1304         if (pkt_len != first_buf->pkt_len)
1305                 rte_panic("bad buffer: expected length %u found %u\n",
1306                           first_buf->pkt_len, pkt_len);
1307 }
1308
1309 #define avp_dev_buffer_sanity_check(a, b) \
1310         __avp_dev_buffer_sanity_check((a), (b))
1311
1312 #else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */
1313
1314 #define avp_dev_buffer_sanity_check(a, b) do {} while (0)
1315
1316 #endif
1317
1318 /*
1319  * Copy a host buffer chain to a set of mbufs.  This function assumes that
1320  * there exactly the required number of mbufs to copy all source bytes.
1321  */
1322 static inline struct rte_mbuf *
1323 avp_dev_copy_from_buffers(struct avp_dev *avp,
1324                           struct rte_avp_desc *buf,
1325                           struct rte_mbuf **mbufs,
1326                           unsigned int count)
1327 {
1328         struct rte_mbuf *m_previous = NULL;
1329         struct rte_avp_desc *pkt_buf;
1330         unsigned int total_length = 0;
1331         unsigned int copy_length;
1332         unsigned int src_offset;
1333         struct rte_mbuf *m;
1334         uint16_t ol_flags;
1335         uint16_t vlan_tci;
1336         void *pkt_data;
1337         unsigned int i;
1338
1339         avp_dev_buffer_sanity_check(avp, buf);
1340
1341         /* setup the first source buffer */
1342         pkt_buf = avp_dev_translate_buffer(avp, buf);
1343         pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1344         total_length = pkt_buf->pkt_len;
1345         src_offset = 0;
1346
1347         if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1348                 ol_flags = PKT_RX_VLAN_PKT;
1349                 vlan_tci = pkt_buf->vlan_tci;
1350         } else {
1351                 ol_flags = 0;
1352                 vlan_tci = 0;
1353         }
1354
1355         for (i = 0; (i < count) && (buf != NULL); i++) {
1356                 /* fill each destination buffer */
1357                 m = mbufs[i];
1358
1359                 if (m_previous != NULL)
1360                         m_previous->next = m;
1361
1362                 m_previous = m;
1363
1364                 do {
1365                         /*
1366                          * Copy as many source buffers as will fit in the
1367                          * destination buffer.
1368                          */
1369                         copy_length = RTE_MIN((avp->guest_mbuf_size -
1370                                                rte_pktmbuf_data_len(m)),
1371                                               (pkt_buf->data_len -
1372                                                src_offset));
1373                         rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1374                                                rte_pktmbuf_data_len(m)),
1375                                    RTE_PTR_ADD(pkt_data, src_offset),
1376                                    copy_length);
1377                         rte_pktmbuf_data_len(m) += copy_length;
1378                         src_offset += copy_length;
1379
1380                         if (likely(src_offset == pkt_buf->data_len)) {
1381                                 /* need a new source buffer */
1382                                 buf = pkt_buf->next;
1383                                 if (buf != NULL) {
1384                                         pkt_buf = avp_dev_translate_buffer(
1385                                                 avp, buf);
1386                                         pkt_data = avp_dev_translate_buffer(
1387                                                 avp, pkt_buf->data);
1388                                         src_offset = 0;
1389                                 }
1390                         }
1391
1392                         if (unlikely(rte_pktmbuf_data_len(m) ==
1393                                      avp->guest_mbuf_size)) {
1394                                 /* need a new destination mbuf */
1395                                 break;
1396                         }
1397
1398                 } while (buf != NULL);
1399         }
1400
1401         m = mbufs[0];
1402         m->ol_flags = ol_flags;
1403         m->nb_segs = count;
1404         rte_pktmbuf_pkt_len(m) = total_length;
1405         m->vlan_tci = vlan_tci;
1406
1407         __rte_mbuf_sanity_check(m, 1);
1408
1409         return m;
1410 }
1411
1412 static uint16_t
1413 avp_recv_scattered_pkts(void *rx_queue,
1414                         struct rte_mbuf **rx_pkts,
1415                         uint16_t nb_pkts)
1416 {
1417         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1418         struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1419         struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS];
1420         struct avp_dev *avp = rxq->avp;
1421         struct rte_avp_desc *pkt_buf;
1422         struct rte_avp_fifo *free_q;
1423         struct rte_avp_fifo *rx_q;
1424         struct rte_avp_desc *buf;
1425         unsigned int count, avail, n;
1426         unsigned int guest_mbuf_size;
1427         struct rte_mbuf *m;
1428         unsigned int required;
1429         unsigned int buf_len;
1430         unsigned int port_id;
1431         unsigned int i;
1432
1433         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1434                 /* VM live migration in progress */
1435                 return 0;
1436         }
1437
1438         guest_mbuf_size = avp->guest_mbuf_size;
1439         port_id = avp->port_id;
1440         rx_q = avp->rx_q[rxq->queue_id];
1441         free_q = avp->free_q[rxq->queue_id];
1442
1443         /* setup next queue to service */
1444         rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1445                 (rxq->queue_id + 1) : rxq->queue_base;
1446
1447         /* determine how many slots are available in the free queue */
1448         count = avp_fifo_free_count(free_q);
1449
1450         /* determine how many packets are available in the rx queue */
1451         avail = avp_fifo_count(rx_q);
1452
1453         /* determine how many packets can be received */
1454         count = RTE_MIN(count, avail);
1455         count = RTE_MIN(count, nb_pkts);
1456         count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1457
1458         if (unlikely(count == 0)) {
1459                 /* no free buffers, or no buffers on the rx queue */
1460                 return 0;
1461         }
1462
1463         /* retrieve pending packets */
1464         n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1465         PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1466                    count, rx_q);
1467
1468         count = 0;
1469         for (i = 0; i < n; i++) {
1470                 /* prefetch next entry while processing current one */
1471                 if (i + 1 < n) {
1472                         pkt_buf = avp_dev_translate_buffer(avp,
1473                                                            avp_bufs[i + 1]);
1474                         rte_prefetch0(pkt_buf);
1475                 }
1476                 buf = avp_bufs[i];
1477
1478                 /* Peek into the first buffer to determine the total length */
1479                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1480                 buf_len = pkt_buf->pkt_len;
1481
1482                 /* Allocate enough mbufs to receive the entire packet */
1483                 required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size;
1484                 if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) {
1485                         rxq->dev_data->rx_mbuf_alloc_failed++;
1486                         continue;
1487                 }
1488
1489                 /* Copy the data from the buffers to our mbufs */
1490                 m = avp_dev_copy_from_buffers(avp, buf, mbufs, required);
1491
1492                 /* finalize mbuf */
1493                 m->port = port_id;
1494
1495                 if (_avp_mac_filter(avp, m) != 0) {
1496                         /* silently discard packets not destined to our MAC */
1497                         rte_pktmbuf_free(m);
1498                         continue;
1499                 }
1500
1501                 /* return new mbuf to caller */
1502                 rx_pkts[count++] = m;
1503                 rxq->bytes += buf_len;
1504         }
1505
1506         rxq->packets += count;
1507
1508         /* return the buffers to the free queue */
1509         avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1510
1511         return count;
1512 }
1513
1514
1515 static uint16_t
1516 avp_recv_pkts(void *rx_queue,
1517               struct rte_mbuf **rx_pkts,
1518               uint16_t nb_pkts)
1519 {
1520         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1521         struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1522         struct avp_dev *avp = rxq->avp;
1523         struct rte_avp_desc *pkt_buf;
1524         struct rte_avp_fifo *free_q;
1525         struct rte_avp_fifo *rx_q;
1526         unsigned int count, avail, n;
1527         unsigned int pkt_len;
1528         struct rte_mbuf *m;
1529         char *pkt_data;
1530         unsigned int i;
1531
1532         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1533                 /* VM live migration in progress */
1534                 return 0;
1535         }
1536
1537         rx_q = avp->rx_q[rxq->queue_id];
1538         free_q = avp->free_q[rxq->queue_id];
1539
1540         /* setup next queue to service */
1541         rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1542                 (rxq->queue_id + 1) : rxq->queue_base;
1543
1544         /* determine how many slots are available in the free queue */
1545         count = avp_fifo_free_count(free_q);
1546
1547         /* determine how many packets are available in the rx queue */
1548         avail = avp_fifo_count(rx_q);
1549
1550         /* determine how many packets can be received */
1551         count = RTE_MIN(count, avail);
1552         count = RTE_MIN(count, nb_pkts);
1553         count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1554
1555         if (unlikely(count == 0)) {
1556                 /* no free buffers, or no buffers on the rx queue */
1557                 return 0;
1558         }
1559
1560         /* retrieve pending packets */
1561         n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1562         PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1563                    count, rx_q);
1564
1565         count = 0;
1566         for (i = 0; i < n; i++) {
1567                 /* prefetch next entry while processing current one */
1568                 if (i < n - 1) {
1569                         pkt_buf = avp_dev_translate_buffer(avp,
1570                                                            avp_bufs[i + 1]);
1571                         rte_prefetch0(pkt_buf);
1572                 }
1573
1574                 /* Adjust host pointers for guest addressing */
1575                 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1576                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1577                 pkt_len = pkt_buf->pkt_len;
1578
1579                 if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1580                              (pkt_buf->nb_segs > 1))) {
1581                         /*
1582                          * application should be using the scattered receive
1583                          * function
1584                          */
1585                         rxq->errors++;
1586                         continue;
1587                 }
1588
1589                 /* process each packet to be transmitted */
1590                 m = rte_pktmbuf_alloc(avp->pool);
1591                 if (unlikely(m == NULL)) {
1592                         rxq->dev_data->rx_mbuf_alloc_failed++;
1593                         continue;
1594                 }
1595
1596                 /* copy data out of the host buffer to our buffer */
1597                 m->data_off = RTE_PKTMBUF_HEADROOM;
1598                 rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len);
1599
1600                 /* initialize the local mbuf */
1601                 rte_pktmbuf_data_len(m) = pkt_len;
1602                 rte_pktmbuf_pkt_len(m) = pkt_len;
1603                 m->port = avp->port_id;
1604
1605                 if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1606                         m->ol_flags = PKT_RX_VLAN_PKT;
1607                         m->vlan_tci = pkt_buf->vlan_tci;
1608                 }
1609
1610                 if (_avp_mac_filter(avp, m) != 0) {
1611                         /* silently discard packets not destined to our MAC */
1612                         rte_pktmbuf_free(m);
1613                         continue;
1614                 }
1615
1616                 /* return new mbuf to caller */
1617                 rx_pkts[count++] = m;
1618                 rxq->bytes += pkt_len;
1619         }
1620
1621         rxq->packets += count;
1622
1623         /* return the buffers to the free queue */
1624         avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1625
1626         return count;
1627 }
1628
1629 /*
1630  * Copy a chained mbuf to a set of host buffers.  This function assumes that
1631  * there are sufficient destination buffers to contain the entire source
1632  * packet.
1633  */
1634 static inline uint16_t
1635 avp_dev_copy_to_buffers(struct avp_dev *avp,
1636                         struct rte_mbuf *mbuf,
1637                         struct rte_avp_desc **buffers,
1638                         unsigned int count)
1639 {
1640         struct rte_avp_desc *previous_buf = NULL;
1641         struct rte_avp_desc *first_buf = NULL;
1642         struct rte_avp_desc *pkt_buf;
1643         struct rte_avp_desc *buf;
1644         size_t total_length;
1645         struct rte_mbuf *m;
1646         size_t copy_length;
1647         size_t src_offset;
1648         char *pkt_data;
1649         unsigned int i;
1650
1651         __rte_mbuf_sanity_check(mbuf, 1);
1652
1653         m = mbuf;
1654         src_offset = 0;
1655         total_length = rte_pktmbuf_pkt_len(m);
1656         for (i = 0; (i < count) && (m != NULL); i++) {
1657                 /* fill each destination buffer */
1658                 buf = buffers[i];
1659
1660                 if (i < count - 1) {
1661                         /* prefetch next entry while processing this one */
1662                         pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]);
1663                         rte_prefetch0(pkt_buf);
1664                 }
1665
1666                 /* Adjust pointers for guest addressing */
1667                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1668                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1669
1670                 /* setup the buffer chain */
1671                 if (previous_buf != NULL)
1672                         previous_buf->next = buf;
1673                 else
1674                         first_buf = pkt_buf;
1675
1676                 previous_buf = pkt_buf;
1677
1678                 do {
1679                         /*
1680                          * copy as many source mbuf segments as will fit in the
1681                          * destination buffer.
1682                          */
1683                         copy_length = RTE_MIN((avp->host_mbuf_size -
1684                                                pkt_buf->data_len),
1685                                               (rte_pktmbuf_data_len(m) -
1686                                                src_offset));
1687                         rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len),
1688                                    RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1689                                                src_offset),
1690                                    copy_length);
1691                         pkt_buf->data_len += copy_length;
1692                         src_offset += copy_length;
1693
1694                         if (likely(src_offset == rte_pktmbuf_data_len(m))) {
1695                                 /* need a new source buffer */
1696                                 m = m->next;
1697                                 src_offset = 0;
1698                         }
1699
1700                         if (unlikely(pkt_buf->data_len ==
1701                                      avp->host_mbuf_size)) {
1702                                 /* need a new destination buffer */
1703                                 break;
1704                         }
1705
1706                 } while (m != NULL);
1707         }
1708
1709         first_buf->nb_segs = count;
1710         first_buf->pkt_len = total_length;
1711
1712         if (mbuf->ol_flags & PKT_TX_VLAN_PKT) {
1713                 first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1714                 first_buf->vlan_tci = mbuf->vlan_tci;
1715         }
1716
1717         avp_dev_buffer_sanity_check(avp, buffers[0]);
1718
1719         return total_length;
1720 }
1721
1722
1723 static uint16_t
1724 avp_xmit_scattered_pkts(void *tx_queue,
1725                         struct rte_mbuf **tx_pkts,
1726                         uint16_t nb_pkts)
1727 {
1728         struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST *
1729                                        RTE_AVP_MAX_MBUF_SEGMENTS)];
1730         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1731         struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST];
1732         struct avp_dev *avp = txq->avp;
1733         struct rte_avp_fifo *alloc_q;
1734         struct rte_avp_fifo *tx_q;
1735         unsigned int count, avail, n;
1736         unsigned int orig_nb_pkts;
1737         struct rte_mbuf *m;
1738         unsigned int required;
1739         unsigned int segments;
1740         unsigned int tx_bytes;
1741         unsigned int i;
1742
1743         orig_nb_pkts = nb_pkts;
1744         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1745                 /* VM live migration in progress */
1746                 /* TODO ... buffer for X packets then drop? */
1747                 txq->errors += nb_pkts;
1748                 return 0;
1749         }
1750
1751         tx_q = avp->tx_q[txq->queue_id];
1752         alloc_q = avp->alloc_q[txq->queue_id];
1753
1754         /* limit the number of transmitted packets to the max burst size */
1755         if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1756                 nb_pkts = AVP_MAX_TX_BURST;
1757
1758         /* determine how many buffers are available to copy into */
1759         avail = avp_fifo_count(alloc_q);
1760         if (unlikely(avail > (AVP_MAX_TX_BURST *
1761                               RTE_AVP_MAX_MBUF_SEGMENTS)))
1762                 avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS;
1763
1764         /* determine how many slots are available in the transmit queue */
1765         count = avp_fifo_free_count(tx_q);
1766
1767         /* determine how many packets can be sent */
1768         nb_pkts = RTE_MIN(count, nb_pkts);
1769
1770         /* determine how many packets will fit in the available buffers */
1771         count = 0;
1772         segments = 0;
1773         for (i = 0; i < nb_pkts; i++) {
1774                 m = tx_pkts[i];
1775                 if (likely(i < (unsigned int)nb_pkts - 1)) {
1776                         /* prefetch next entry while processing this one */
1777                         rte_prefetch0(tx_pkts[i + 1]);
1778                 }
1779                 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1780                         avp->host_mbuf_size;
1781
1782                 if (unlikely((required == 0) ||
1783                              (required > RTE_AVP_MAX_MBUF_SEGMENTS)))
1784                         break;
1785                 else if (unlikely(required + segments > avail))
1786                         break;
1787                 segments += required;
1788                 count++;
1789         }
1790         nb_pkts = count;
1791
1792         if (unlikely(nb_pkts == 0)) {
1793                 /* no available buffers, or no space on the tx queue */
1794                 txq->errors += orig_nb_pkts;
1795                 return 0;
1796         }
1797
1798         PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1799                    nb_pkts, tx_q);
1800
1801         /* retrieve sufficient send buffers */
1802         n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments);
1803         if (unlikely(n != segments)) {
1804                 PMD_TX_LOG(DEBUG, "Failed to allocate buffers "
1805                            "n=%u, segments=%u, orig=%u\n",
1806                            n, segments, orig_nb_pkts);
1807                 txq->errors += orig_nb_pkts;
1808                 return 0;
1809         }
1810
1811         tx_bytes = 0;
1812         count = 0;
1813         for (i = 0; i < nb_pkts; i++) {
1814                 /* process each packet to be transmitted */
1815                 m = tx_pkts[i];
1816
1817                 /* determine how many buffers are required for this packet */
1818                 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1819                         avp->host_mbuf_size;
1820
1821                 tx_bytes += avp_dev_copy_to_buffers(avp, m,
1822                                                     &avp_bufs[count], required);
1823                 tx_bufs[i] = avp_bufs[count];
1824                 count += required;
1825
1826                 /* free the original mbuf */
1827                 rte_pktmbuf_free(m);
1828         }
1829
1830         txq->packets += nb_pkts;
1831         txq->bytes += tx_bytes;
1832
1833 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1834         for (i = 0; i < nb_pkts; i++)
1835                 avp_dev_buffer_sanity_check(avp, tx_bufs[i]);
1836 #endif
1837
1838         /* send the packets */
1839         n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts);
1840         if (unlikely(n != orig_nb_pkts))
1841                 txq->errors += (orig_nb_pkts - n);
1842
1843         return n;
1844 }
1845
1846
1847 static uint16_t
1848 avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1849 {
1850         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1851         struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST];
1852         struct avp_dev *avp = txq->avp;
1853         struct rte_avp_desc *pkt_buf;
1854         struct rte_avp_fifo *alloc_q;
1855         struct rte_avp_fifo *tx_q;
1856         unsigned int count, avail, n;
1857         struct rte_mbuf *m;
1858         unsigned int pkt_len;
1859         unsigned int tx_bytes;
1860         char *pkt_data;
1861         unsigned int i;
1862
1863         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1864                 /* VM live migration in progress */
1865                 /* TODO ... buffer for X packets then drop?! */
1866                 txq->errors++;
1867                 return 0;
1868         }
1869
1870         tx_q = avp->tx_q[txq->queue_id];
1871         alloc_q = avp->alloc_q[txq->queue_id];
1872
1873         /* limit the number of transmitted packets to the max burst size */
1874         if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1875                 nb_pkts = AVP_MAX_TX_BURST;
1876
1877         /* determine how many buffers are available to copy into */
1878         avail = avp_fifo_count(alloc_q);
1879
1880         /* determine how many slots are available in the transmit queue */
1881         count = avp_fifo_free_count(tx_q);
1882
1883         /* determine how many packets can be sent */
1884         count = RTE_MIN(count, avail);
1885         count = RTE_MIN(count, nb_pkts);
1886
1887         if (unlikely(count == 0)) {
1888                 /* no available buffers, or no space on the tx queue */
1889                 txq->errors += nb_pkts;
1890                 return 0;
1891         }
1892
1893         PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1894                    count, tx_q);
1895
1896         /* retrieve sufficient send buffers */
1897         n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count);
1898         if (unlikely(n != count)) {
1899                 txq->errors++;
1900                 return 0;
1901         }
1902
1903         tx_bytes = 0;
1904         for (i = 0; i < count; i++) {
1905                 /* prefetch next entry while processing the current one */
1906                 if (i < count - 1) {
1907                         pkt_buf = avp_dev_translate_buffer(avp,
1908                                                            avp_bufs[i + 1]);
1909                         rte_prefetch0(pkt_buf);
1910                 }
1911
1912                 /* process each packet to be transmitted */
1913                 m = tx_pkts[i];
1914
1915                 /* Adjust pointers for guest addressing */
1916                 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1917                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1918                 pkt_len = rte_pktmbuf_pkt_len(m);
1919
1920                 if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1921                                          (pkt_len > avp->host_mbuf_size))) {
1922                         /*
1923                          * application should be using the scattered transmit
1924                          * function; send it truncated to avoid the performance
1925                          * hit of having to manage returning the already
1926                          * allocated buffer to the free list.  This should not
1927                          * happen since the application should have set the
1928                          * max_rx_pkt_len based on its MTU and it should be
1929                          * policing its own packet sizes.
1930                          */
1931                         txq->errors++;
1932                         pkt_len = RTE_MIN(avp->guest_mbuf_size,
1933                                           avp->host_mbuf_size);
1934                 }
1935
1936                 /* copy data out of our mbuf and into the AVP buffer */
1937                 rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len);
1938                 pkt_buf->pkt_len = pkt_len;
1939                 pkt_buf->data_len = pkt_len;
1940                 pkt_buf->nb_segs = 1;
1941                 pkt_buf->next = NULL;
1942
1943                 if (m->ol_flags & PKT_TX_VLAN_PKT) {
1944                         pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1945                         pkt_buf->vlan_tci = m->vlan_tci;
1946                 }
1947
1948                 tx_bytes += pkt_len;
1949
1950                 /* free the original mbuf */
1951                 rte_pktmbuf_free(m);
1952         }
1953
1954         txq->packets += count;
1955         txq->bytes += tx_bytes;
1956
1957         /* send the packets */
1958         n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count);
1959
1960         return n;
1961 }
1962
1963 static void
1964 avp_dev_rx_queue_release(void *rx_queue)
1965 {
1966         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1967         struct avp_dev *avp = rxq->avp;
1968         struct rte_eth_dev_data *data = avp->dev_data;
1969         unsigned int i;
1970
1971         for (i = 0; i < avp->num_rx_queues; i++) {
1972                 if (data->rx_queues[i] == rxq)
1973                         data->rx_queues[i] = NULL;
1974         }
1975 }
1976
1977 static void
1978 avp_dev_tx_queue_release(void *tx_queue)
1979 {
1980         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1981         struct avp_dev *avp = txq->avp;
1982         struct rte_eth_dev_data *data = avp->dev_data;
1983         unsigned int i;
1984
1985         for (i = 0; i < avp->num_tx_queues; i++) {
1986                 if (data->tx_queues[i] == txq)
1987                         data->tx_queues[i] = NULL;
1988         }
1989 }
1990
1991 static int
1992 avp_dev_configure(struct rte_eth_dev *eth_dev)
1993 {
1994         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
1995         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1996         struct rte_avp_device_info *host_info;
1997         struct rte_avp_device_config config;
1998         int mask = 0;
1999         void *addr;
2000         int ret;
2001
2002         rte_spinlock_lock(&avp->lock);
2003         if (avp->flags & AVP_F_DETACHED) {
2004                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2005                 ret = -ENOTSUP;
2006                 goto unlock;
2007         }
2008
2009         addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
2010         host_info = (struct rte_avp_device_info *)addr;
2011
2012         /* Setup required number of queues */
2013         _avp_set_queue_counts(eth_dev);
2014
2015         mask = (ETH_VLAN_STRIP_MASK |
2016                 ETH_VLAN_FILTER_MASK |
2017                 ETH_VLAN_EXTEND_MASK);
2018         avp_vlan_offload_set(eth_dev, mask);
2019
2020         /* update device config */
2021         memset(&config, 0, sizeof(config));
2022         config.device_id = host_info->device_id;
2023         config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
2024         config.driver_version = AVP_DPDK_DRIVER_VERSION;
2025         config.features = avp->features;
2026         config.num_tx_queues = avp->num_tx_queues;
2027         config.num_rx_queues = avp->num_rx_queues;
2028
2029         ret = avp_dev_ctrl_set_config(eth_dev, &config);
2030         if (ret < 0) {
2031                 PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
2032                             ret);
2033                 goto unlock;
2034         }
2035
2036         avp->flags |= AVP_F_CONFIGURED;
2037         ret = 0;
2038
2039 unlock:
2040         rte_spinlock_unlock(&avp->lock);
2041         return ret;
2042 }
2043
2044 static int
2045 avp_dev_start(struct rte_eth_dev *eth_dev)
2046 {
2047         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2048         int ret;
2049
2050         rte_spinlock_lock(&avp->lock);
2051         if (avp->flags & AVP_F_DETACHED) {
2052                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2053                 ret = -ENOTSUP;
2054                 goto unlock;
2055         }
2056
2057         /* disable features that we do not support */
2058         eth_dev->data->dev_conf.rxmode.hw_ip_checksum = 0;
2059         eth_dev->data->dev_conf.rxmode.hw_vlan_filter = 0;
2060         eth_dev->data->dev_conf.rxmode.hw_vlan_extend = 0;
2061         eth_dev->data->dev_conf.rxmode.hw_strip_crc = 0;
2062
2063         /* update link state */
2064         ret = avp_dev_ctrl_set_link_state(eth_dev, 1);
2065         if (ret < 0) {
2066                 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2067                             ret);
2068                 goto unlock;
2069         }
2070
2071         /* remember current link state */
2072         avp->flags |= AVP_F_LINKUP;
2073
2074         ret = 0;
2075
2076 unlock:
2077         rte_spinlock_unlock(&avp->lock);
2078         return ret;
2079 }
2080
2081 static void
2082 avp_dev_stop(struct rte_eth_dev *eth_dev)
2083 {
2084         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2085         int ret;
2086
2087         rte_spinlock_lock(&avp->lock);
2088         if (avp->flags & AVP_F_DETACHED) {
2089                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2090                 goto unlock;
2091         }
2092
2093         /* remember current link state */
2094         avp->flags &= ~AVP_F_LINKUP;
2095
2096         /* update link state */
2097         ret = avp_dev_ctrl_set_link_state(eth_dev, 0);
2098         if (ret < 0) {
2099                 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2100                             ret);
2101         }
2102
2103 unlock:
2104         rte_spinlock_unlock(&avp->lock);
2105 }
2106
2107 static void
2108 avp_dev_close(struct rte_eth_dev *eth_dev)
2109 {
2110         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2111         int ret;
2112
2113         rte_spinlock_lock(&avp->lock);
2114         if (avp->flags & AVP_F_DETACHED) {
2115                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2116                 goto unlock;
2117         }
2118
2119         /* remember current link state */
2120         avp->flags &= ~AVP_F_LINKUP;
2121         avp->flags &= ~AVP_F_CONFIGURED;
2122
2123         ret = avp_dev_disable_interrupts(eth_dev);
2124         if (ret < 0) {
2125                 PMD_DRV_LOG(ERR, "Failed to disable interrupts\n");
2126                 /* continue */
2127         }
2128
2129         /* update device state */
2130         ret = avp_dev_ctrl_shutdown(eth_dev);
2131         if (ret < 0) {
2132                 PMD_DRV_LOG(ERR, "Device shutdown failed by host, ret=%d\n",
2133                             ret);
2134                 /* continue */
2135         }
2136
2137 unlock:
2138         rte_spinlock_unlock(&avp->lock);
2139 }
2140
2141 static int
2142 avp_dev_link_update(struct rte_eth_dev *eth_dev,
2143                                         __rte_unused int wait_to_complete)
2144 {
2145         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2146         struct rte_eth_link *link = &eth_dev->data->dev_link;
2147
2148         link->link_speed = ETH_SPEED_NUM_10G;
2149         link->link_duplex = ETH_LINK_FULL_DUPLEX;
2150         link->link_status = !!(avp->flags & AVP_F_LINKUP);
2151
2152         return -1;
2153 }
2154
2155 static void
2156 avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2157 {
2158         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2159
2160         rte_spinlock_lock(&avp->lock);
2161         if ((avp->flags & AVP_F_PROMISC) == 0) {
2162                 avp->flags |= AVP_F_PROMISC;
2163                 PMD_DRV_LOG(DEBUG, "Promiscuous mode enabled on %u\n",
2164                             eth_dev->data->port_id);
2165         }
2166         rte_spinlock_unlock(&avp->lock);
2167 }
2168
2169 static void
2170 avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev)
2171 {
2172         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2173
2174         rte_spinlock_lock(&avp->lock);
2175         if ((avp->flags & AVP_F_PROMISC) != 0) {
2176                 avp->flags &= ~AVP_F_PROMISC;
2177                 PMD_DRV_LOG(DEBUG, "Promiscuous mode disabled on %u\n",
2178                             eth_dev->data->port_id);
2179         }
2180         rte_spinlock_unlock(&avp->lock);
2181 }
2182
2183 static void
2184 avp_dev_info_get(struct rte_eth_dev *eth_dev,
2185                  struct rte_eth_dev_info *dev_info)
2186 {
2187         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2188
2189         dev_info->driver_name = "rte_avp_pmd";
2190         dev_info->pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
2191         dev_info->max_rx_queues = avp->max_rx_queues;
2192         dev_info->max_tx_queues = avp->max_tx_queues;
2193         dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE;
2194         dev_info->max_rx_pktlen = avp->max_rx_pkt_len;
2195         dev_info->max_mac_addrs = AVP_MAX_MAC_ADDRS;
2196         if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2197                 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2198                 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
2199         }
2200 }
2201
2202 static void
2203 avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
2204 {
2205         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2206
2207         if (mask & ETH_VLAN_STRIP_MASK) {
2208                 if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2209                         if (eth_dev->data->dev_conf.rxmode.hw_vlan_strip)
2210                                 avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD;
2211                         else
2212                                 avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD;
2213                 } else {
2214                         PMD_DRV_LOG(ERR, "VLAN strip offload not supported\n");
2215                 }
2216         }
2217
2218         if (mask & ETH_VLAN_FILTER_MASK) {
2219                 if (eth_dev->data->dev_conf.rxmode.hw_vlan_filter)
2220                         PMD_DRV_LOG(ERR, "VLAN filter offload not supported\n");
2221         }
2222
2223         if (mask & ETH_VLAN_EXTEND_MASK) {
2224                 if (eth_dev->data->dev_conf.rxmode.hw_vlan_extend)
2225                         PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n");
2226         }
2227 }
2228
2229 static void
2230 avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats)
2231 {
2232         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2233         unsigned int i;
2234
2235         for (i = 0; i < avp->num_rx_queues; i++) {
2236                 struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2237
2238                 if (rxq) {
2239                         stats->ipackets += rxq->packets;
2240                         stats->ibytes += rxq->bytes;
2241                         stats->ierrors += rxq->errors;
2242
2243                         stats->q_ipackets[i] += rxq->packets;
2244                         stats->q_ibytes[i] += rxq->bytes;
2245                         stats->q_errors[i] += rxq->errors;
2246                 }
2247         }
2248
2249         for (i = 0; i < avp->num_tx_queues; i++) {
2250                 struct avp_queue *txq = avp->dev_data->tx_queues[i];
2251
2252                 if (txq) {
2253                         stats->opackets += txq->packets;
2254                         stats->obytes += txq->bytes;
2255                         stats->oerrors += txq->errors;
2256
2257                         stats->q_opackets[i] += txq->packets;
2258                         stats->q_obytes[i] += txq->bytes;
2259                         stats->q_errors[i] += txq->errors;
2260                 }
2261         }
2262 }
2263
2264 static void
2265 avp_dev_stats_reset(struct rte_eth_dev *eth_dev)
2266 {
2267         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2268         unsigned int i;
2269
2270         for (i = 0; i < avp->num_rx_queues; i++) {
2271                 struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2272
2273                 if (rxq) {
2274                         rxq->bytes = 0;
2275                         rxq->packets = 0;
2276                         rxq->errors = 0;
2277                 }
2278         }
2279
2280         for (i = 0; i < avp->num_tx_queues; i++) {
2281                 struct avp_queue *txq = avp->dev_data->tx_queues[i];
2282
2283                 if (txq) {
2284                         txq->bytes = 0;
2285                         txq->packets = 0;
2286                         txq->errors = 0;
2287                 }
2288         }
2289 }
2290
2291 RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd.pci_drv);
2292 RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map);