pci: introduce library and driver
[dpdk.git] / drivers / net / avp / avp_ethdev.c
1 /*
2  *   BSD LICENSE
3  *
4  * Copyright (c) 2013-2017, Wind River Systems, Inc.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1) Redistributions of source code must retain the above copyright notice,
10  * this list of conditions and the following disclaimer.
11  *
12  * 2) Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * 3) Neither the name of Wind River Systems nor the names of its contributors
17  * may be used to endorse or promote products derived from this software
18  * without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <stdint.h>
34 #include <string.h>
35 #include <stdio.h>
36 #include <errno.h>
37 #include <unistd.h>
38
39 #include <rte_ethdev.h>
40 #include <rte_ethdev_pci.h>
41 #include <rte_memcpy.h>
42 #include <rte_string_fns.h>
43 #include <rte_memzone.h>
44 #include <rte_malloc.h>
45 #include <rte_atomic.h>
46 #include <rte_branch_prediction.h>
47 #include <rte_pci.h>
48 #include <rte_bus_pci.h>
49 #include <rte_ether.h>
50 #include <rte_common.h>
51 #include <rte_cycles.h>
52 #include <rte_spinlock.h>
53 #include <rte_byteorder.h>
54 #include <rte_dev.h>
55 #include <rte_memory.h>
56 #include <rte_eal.h>
57 #include <rte_io.h>
58
59 #include "rte_avp_common.h"
60 #include "rte_avp_fifo.h"
61
62 #include "avp_logs.h"
63
64
65 static int avp_dev_create(struct rte_pci_device *pci_dev,
66                           struct rte_eth_dev *eth_dev);
67
68 static int avp_dev_configure(struct rte_eth_dev *dev);
69 static int avp_dev_start(struct rte_eth_dev *dev);
70 static void avp_dev_stop(struct rte_eth_dev *dev);
71 static void avp_dev_close(struct rte_eth_dev *dev);
72 static void avp_dev_info_get(struct rte_eth_dev *dev,
73                              struct rte_eth_dev_info *dev_info);
74 static int avp_vlan_offload_set(struct rte_eth_dev *dev, int mask);
75 static int avp_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete);
76 static void avp_dev_promiscuous_enable(struct rte_eth_dev *dev);
77 static void avp_dev_promiscuous_disable(struct rte_eth_dev *dev);
78
79 static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev,
80                                   uint16_t rx_queue_id,
81                                   uint16_t nb_rx_desc,
82                                   unsigned int socket_id,
83                                   const struct rte_eth_rxconf *rx_conf,
84                                   struct rte_mempool *pool);
85
86 static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev,
87                                   uint16_t tx_queue_id,
88                                   uint16_t nb_tx_desc,
89                                   unsigned int socket_id,
90                                   const struct rte_eth_txconf *tx_conf);
91
92 static uint16_t avp_recv_scattered_pkts(void *rx_queue,
93                                         struct rte_mbuf **rx_pkts,
94                                         uint16_t nb_pkts);
95
96 static uint16_t avp_recv_pkts(void *rx_queue,
97                               struct rte_mbuf **rx_pkts,
98                               uint16_t nb_pkts);
99
100 static uint16_t avp_xmit_scattered_pkts(void *tx_queue,
101                                         struct rte_mbuf **tx_pkts,
102                                         uint16_t nb_pkts);
103
104 static uint16_t avp_xmit_pkts(void *tx_queue,
105                               struct rte_mbuf **tx_pkts,
106                               uint16_t nb_pkts);
107
108 static void avp_dev_rx_queue_release(void *rxq);
109 static void avp_dev_tx_queue_release(void *txq);
110
111 static int avp_dev_stats_get(struct rte_eth_dev *dev,
112                               struct rte_eth_stats *stats);
113 static void avp_dev_stats_reset(struct rte_eth_dev *dev);
114
115
116 #define AVP_MAX_RX_BURST 64
117 #define AVP_MAX_TX_BURST 64
118 #define AVP_MAX_MAC_ADDRS 1
119 #define AVP_MIN_RX_BUFSIZE ETHER_MIN_LEN
120
121
122 /*
123  * Defines the number of microseconds to wait before checking the response
124  * queue for completion.
125  */
126 #define AVP_REQUEST_DELAY_USECS (5000)
127
128 /*
129  * Defines the number times to check the response queue for completion before
130  * declaring a timeout.
131  */
132 #define AVP_MAX_REQUEST_RETRY (100)
133
134 /* Defines the current PCI driver version number */
135 #define AVP_DPDK_DRIVER_VERSION RTE_AVP_CURRENT_GUEST_VERSION
136
137 /*
138  * The set of PCI devices this driver supports
139  */
140 static const struct rte_pci_id pci_id_avp_map[] = {
141         { .vendor_id = RTE_AVP_PCI_VENDOR_ID,
142           .device_id = RTE_AVP_PCI_DEVICE_ID,
143           .subsystem_vendor_id = RTE_AVP_PCI_SUB_VENDOR_ID,
144           .subsystem_device_id = RTE_AVP_PCI_SUB_DEVICE_ID,
145           .class_id = RTE_CLASS_ANY_ID,
146         },
147
148         { .vendor_id = 0, /* sentinel */
149         },
150 };
151
152 /*
153  * dev_ops for avp, bare necessities for basic operation
154  */
155 static const struct eth_dev_ops avp_eth_dev_ops = {
156         .dev_configure       = avp_dev_configure,
157         .dev_start           = avp_dev_start,
158         .dev_stop            = avp_dev_stop,
159         .dev_close           = avp_dev_close,
160         .dev_infos_get       = avp_dev_info_get,
161         .vlan_offload_set    = avp_vlan_offload_set,
162         .stats_get           = avp_dev_stats_get,
163         .stats_reset         = avp_dev_stats_reset,
164         .link_update         = avp_dev_link_update,
165         .promiscuous_enable  = avp_dev_promiscuous_enable,
166         .promiscuous_disable = avp_dev_promiscuous_disable,
167         .rx_queue_setup      = avp_dev_rx_queue_setup,
168         .rx_queue_release    = avp_dev_rx_queue_release,
169         .tx_queue_setup      = avp_dev_tx_queue_setup,
170         .tx_queue_release    = avp_dev_tx_queue_release,
171 };
172
173 /**@{ AVP device flags */
174 #define AVP_F_PROMISC (1 << 1)
175 #define AVP_F_CONFIGURED (1 << 2)
176 #define AVP_F_LINKUP (1 << 3)
177 #define AVP_F_DETACHED (1 << 4)
178 /**@} */
179
180 /* Ethernet device validation marker */
181 #define AVP_ETHDEV_MAGIC 0x92972862
182
183 /*
184  * Defines the AVP device attributes which are attached to an RTE ethernet
185  * device
186  */
187 struct avp_dev {
188         uint32_t magic; /**< Memory validation marker */
189         uint64_t device_id; /**< Unique system identifier */
190         struct ether_addr ethaddr; /**< Host specified MAC address */
191         struct rte_eth_dev_data *dev_data;
192         /**< Back pointer to ethernet device data */
193         volatile uint32_t flags; /**< Device operational flags */
194         uint16_t port_id; /**< Ethernet port identifier */
195         struct rte_mempool *pool; /**< pkt mbuf mempool */
196         unsigned int guest_mbuf_size; /**< local pool mbuf size */
197         unsigned int host_mbuf_size; /**< host mbuf size */
198         unsigned int max_rx_pkt_len; /**< maximum receive unit */
199         uint32_t host_features; /**< Supported feature bitmap */
200         uint32_t features; /**< Enabled feature bitmap */
201         unsigned int num_tx_queues; /**< Negotiated number of transmit queues */
202         unsigned int max_tx_queues; /**< Maximum number of transmit queues */
203         unsigned int num_rx_queues; /**< Negotiated number of receive queues */
204         unsigned int max_rx_queues; /**< Maximum number of receive queues */
205
206         struct rte_avp_fifo *tx_q[RTE_AVP_MAX_QUEUES]; /**< TX queue */
207         struct rte_avp_fifo *rx_q[RTE_AVP_MAX_QUEUES]; /**< RX queue */
208         struct rte_avp_fifo *alloc_q[RTE_AVP_MAX_QUEUES];
209         /**< Allocated mbufs queue */
210         struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES];
211         /**< To be freed mbufs queue */
212
213         /* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */
214         rte_spinlock_t lock;
215
216         /* For request & response */
217         struct rte_avp_fifo *req_q; /**< Request queue */
218         struct rte_avp_fifo *resp_q; /**< Response queue */
219         void *host_sync_addr; /**< (host) Req/Resp Mem address */
220         void *sync_addr; /**< Req/Resp Mem address */
221         void *host_mbuf_addr; /**< (host) MBUF pool start address */
222         void *mbuf_addr; /**< MBUF pool start address */
223 } __rte_cache_aligned;
224
225 /* RTE ethernet private data */
226 struct avp_adapter {
227         struct avp_dev avp;
228 } __rte_cache_aligned;
229
230
231 /* 32-bit MMIO register write */
232 #define AVP_WRITE32(_value, _addr) rte_write32_relaxed((_value), (_addr))
233
234 /* 32-bit MMIO register read */
235 #define AVP_READ32(_addr) rte_read32_relaxed((_addr))
236
237 /* Macro to cast the ethernet device private data to a AVP object */
238 #define AVP_DEV_PRIVATE_TO_HW(adapter) \
239         (&((struct avp_adapter *)adapter)->avp)
240
241 /*
242  * Defines the structure of a AVP device queue for the purpose of handling the
243  * receive and transmit burst callback functions
244  */
245 struct avp_queue {
246         struct rte_eth_dev_data *dev_data;
247         /**< Backpointer to ethernet device data */
248         struct avp_dev *avp; /**< Backpointer to AVP device */
249         uint16_t queue_id;
250         /**< Queue identifier used for indexing current queue */
251         uint16_t queue_base;
252         /**< Base queue identifier for queue servicing */
253         uint16_t queue_limit;
254         /**< Maximum queue identifier for queue servicing */
255
256         uint64_t packets;
257         uint64_t bytes;
258         uint64_t errors;
259 };
260
261 /* send a request and wait for a response
262  *
263  * @warning must be called while holding the avp->lock spinlock.
264  */
265 static int
266 avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request)
267 {
268         unsigned int retry = AVP_MAX_REQUEST_RETRY;
269         void *resp_addr = NULL;
270         unsigned int count;
271         int ret;
272
273         PMD_DRV_LOG(DEBUG, "Sending request %u to host\n", request->req_id);
274
275         request->result = -ENOTSUP;
276
277         /* Discard any stale responses before starting a new request */
278         while (avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1))
279                 PMD_DRV_LOG(DEBUG, "Discarding stale response\n");
280
281         rte_memcpy(avp->sync_addr, request, sizeof(*request));
282         count = avp_fifo_put(avp->req_q, &avp->host_sync_addr, 1);
283         if (count < 1) {
284                 PMD_DRV_LOG(ERR, "Cannot send request %u to host\n",
285                             request->req_id);
286                 ret = -EBUSY;
287                 goto done;
288         }
289
290         while (retry--) {
291                 /* wait for a response */
292                 usleep(AVP_REQUEST_DELAY_USECS);
293
294                 count = avp_fifo_count(avp->resp_q);
295                 if (count >= 1) {
296                         /* response received */
297                         break;
298                 }
299
300                 if ((count < 1) && (retry == 0)) {
301                         PMD_DRV_LOG(ERR, "Timeout while waiting for a response for %u\n",
302                                     request->req_id);
303                         ret = -ETIME;
304                         goto done;
305                 }
306         }
307
308         /* retrieve the response */
309         count = avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1);
310         if ((count != 1) || (resp_addr != avp->host_sync_addr)) {
311                 PMD_DRV_LOG(ERR, "Invalid response from host, count=%u resp=%p host_sync_addr=%p\n",
312                             count, resp_addr, avp->host_sync_addr);
313                 ret = -ENODATA;
314                 goto done;
315         }
316
317         /* copy to user buffer */
318         rte_memcpy(request, avp->sync_addr, sizeof(*request));
319         ret = 0;
320
321         PMD_DRV_LOG(DEBUG, "Result %d received for request %u\n",
322                     request->result, request->req_id);
323
324 done:
325         return ret;
326 }
327
328 static int
329 avp_dev_ctrl_set_link_state(struct rte_eth_dev *eth_dev, unsigned int state)
330 {
331         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
332         struct rte_avp_request request;
333         int ret;
334
335         /* setup a link state change request */
336         memset(&request, 0, sizeof(request));
337         request.req_id = RTE_AVP_REQ_CFG_NETWORK_IF;
338         request.if_up = state;
339
340         ret = avp_dev_process_request(avp, &request);
341
342         return ret == 0 ? request.result : ret;
343 }
344
345 static int
346 avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev,
347                         struct rte_avp_device_config *config)
348 {
349         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
350         struct rte_avp_request request;
351         int ret;
352
353         /* setup a configure request */
354         memset(&request, 0, sizeof(request));
355         request.req_id = RTE_AVP_REQ_CFG_DEVICE;
356         memcpy(&request.config, config, sizeof(request.config));
357
358         ret = avp_dev_process_request(avp, &request);
359
360         return ret == 0 ? request.result : ret;
361 }
362
363 static int
364 avp_dev_ctrl_shutdown(struct rte_eth_dev *eth_dev)
365 {
366         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
367         struct rte_avp_request request;
368         int ret;
369
370         /* setup a shutdown request */
371         memset(&request, 0, sizeof(request));
372         request.req_id = RTE_AVP_REQ_SHUTDOWN_DEVICE;
373
374         ret = avp_dev_process_request(avp, &request);
375
376         return ret == 0 ? request.result : ret;
377 }
378
379 /* translate from host mbuf virtual address to guest virtual address */
380 static inline void *
381 avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address)
382 {
383         return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address,
384                                        (uintptr_t)avp->host_mbuf_addr),
385                            (uintptr_t)avp->mbuf_addr);
386 }
387
388 /* translate from host physical address to guest virtual address */
389 static void *
390 avp_dev_translate_address(struct rte_eth_dev *eth_dev,
391                           phys_addr_t host_phys_addr)
392 {
393         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
394         struct rte_mem_resource *resource;
395         struct rte_avp_memmap_info *info;
396         struct rte_avp_memmap *map;
397         off_t offset;
398         void *addr;
399         unsigned int i;
400
401         addr = pci_dev->mem_resource[RTE_AVP_PCI_MEMORY_BAR].addr;
402         resource = &pci_dev->mem_resource[RTE_AVP_PCI_MEMMAP_BAR];
403         info = (struct rte_avp_memmap_info *)resource->addr;
404
405         offset = 0;
406         for (i = 0; i < info->nb_maps; i++) {
407                 /* search all segments looking for a matching address */
408                 map = &info->maps[i];
409
410                 if ((host_phys_addr >= map->phys_addr) &&
411                         (host_phys_addr < (map->phys_addr + map->length))) {
412                         /* address is within this segment */
413                         offset += (host_phys_addr - map->phys_addr);
414                         addr = RTE_PTR_ADD(addr, offset);
415
416                         PMD_DRV_LOG(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p\n",
417                                     host_phys_addr, addr);
418
419                         return addr;
420                 }
421                 offset += map->length;
422         }
423
424         return NULL;
425 }
426
427 /* verify that the incoming device version is compatible with our version */
428 static int
429 avp_dev_version_check(uint32_t version)
430 {
431         uint32_t driver = RTE_AVP_STRIP_MINOR_VERSION(AVP_DPDK_DRIVER_VERSION);
432         uint32_t device = RTE_AVP_STRIP_MINOR_VERSION(version);
433
434         if (device <= driver) {
435                 /* the host driver version is less than or equal to ours */
436                 return 0;
437         }
438
439         return 1;
440 }
441
442 /* verify that memory regions have expected version and validation markers */
443 static int
444 avp_dev_check_regions(struct rte_eth_dev *eth_dev)
445 {
446         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
447         struct rte_avp_memmap_info *memmap;
448         struct rte_avp_device_info *info;
449         struct rte_mem_resource *resource;
450         unsigned int i;
451
452         /* Dump resource info for debug */
453         for (i = 0; i < PCI_MAX_RESOURCE; i++) {
454                 resource = &pci_dev->mem_resource[i];
455                 if ((resource->phys_addr == 0) || (resource->len == 0))
456                         continue;
457
458                 PMD_DRV_LOG(DEBUG, "resource[%u]: phys=0x%" PRIx64 " len=%" PRIu64 " addr=%p\n",
459                             i, resource->phys_addr,
460                             resource->len, resource->addr);
461
462                 switch (i) {
463                 case RTE_AVP_PCI_MEMMAP_BAR:
464                         memmap = (struct rte_avp_memmap_info *)resource->addr;
465                         if ((memmap->magic != RTE_AVP_MEMMAP_MAGIC) ||
466                             (memmap->version != RTE_AVP_MEMMAP_VERSION)) {
467                                 PMD_DRV_LOG(ERR, "Invalid memmap magic 0x%08x and version %u\n",
468                                             memmap->magic, memmap->version);
469                                 return -EINVAL;
470                         }
471                         break;
472
473                 case RTE_AVP_PCI_DEVICE_BAR:
474                         info = (struct rte_avp_device_info *)resource->addr;
475                         if ((info->magic != RTE_AVP_DEVICE_MAGIC) ||
476                             avp_dev_version_check(info->version)) {
477                                 PMD_DRV_LOG(ERR, "Invalid device info magic 0x%08x or version 0x%08x > 0x%08x\n",
478                                             info->magic, info->version,
479                                             AVP_DPDK_DRIVER_VERSION);
480                                 return -EINVAL;
481                         }
482                         break;
483
484                 case RTE_AVP_PCI_MEMORY_BAR:
485                 case RTE_AVP_PCI_MMIO_BAR:
486                         if (resource->addr == NULL) {
487                                 PMD_DRV_LOG(ERR, "Missing address space for BAR%u\n",
488                                             i);
489                                 return -EINVAL;
490                         }
491                         break;
492
493                 case RTE_AVP_PCI_MSIX_BAR:
494                 default:
495                         /* no validation required */
496                         break;
497                 }
498         }
499
500         return 0;
501 }
502
503 static int
504 avp_dev_detach(struct rte_eth_dev *eth_dev)
505 {
506         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
507         int ret;
508
509         PMD_DRV_LOG(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "\n",
510                     eth_dev->data->port_id, avp->device_id);
511
512         rte_spinlock_lock(&avp->lock);
513
514         if (avp->flags & AVP_F_DETACHED) {
515                 PMD_DRV_LOG(NOTICE, "port %u already detached\n",
516                             eth_dev->data->port_id);
517                 ret = 0;
518                 goto unlock;
519         }
520
521         /* shutdown the device first so the host stops sending us packets. */
522         ret = avp_dev_ctrl_shutdown(eth_dev);
523         if (ret < 0) {
524                 PMD_DRV_LOG(ERR, "Failed to send/recv shutdown to host, ret=%d\n",
525                             ret);
526                 avp->flags &= ~AVP_F_DETACHED;
527                 goto unlock;
528         }
529
530         avp->flags |= AVP_F_DETACHED;
531         rte_wmb();
532
533         /* wait for queues to acknowledge the presence of the detach flag */
534         rte_delay_ms(1);
535
536         ret = 0;
537
538 unlock:
539         rte_spinlock_unlock(&avp->lock);
540         return ret;
541 }
542
543 static void
544 _avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
545 {
546         struct avp_dev *avp =
547                 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
548         struct avp_queue *rxq;
549         uint16_t queue_count;
550         uint16_t remainder;
551
552         rxq = (struct avp_queue *)eth_dev->data->rx_queues[rx_queue_id];
553
554         /*
555          * Must map all AVP fifos as evenly as possible between the configured
556          * device queues.  Each device queue will service a subset of the AVP
557          * fifos. If there is an odd number of device queues the first set of
558          * device queues will get the extra AVP fifos.
559          */
560         queue_count = avp->num_rx_queues / eth_dev->data->nb_rx_queues;
561         remainder = avp->num_rx_queues % eth_dev->data->nb_rx_queues;
562         if (rx_queue_id < remainder) {
563                 /* these queues must service one extra FIFO */
564                 rxq->queue_base = rx_queue_id * (queue_count + 1);
565                 rxq->queue_limit = rxq->queue_base + (queue_count + 1) - 1;
566         } else {
567                 /* these queues service the regular number of FIFO */
568                 rxq->queue_base = ((remainder * (queue_count + 1)) +
569                                    ((rx_queue_id - remainder) * queue_count));
570                 rxq->queue_limit = rxq->queue_base + queue_count - 1;
571         }
572
573         PMD_DRV_LOG(DEBUG, "rxq %u at %p base %u limit %u\n",
574                     rx_queue_id, rxq, rxq->queue_base, rxq->queue_limit);
575
576         rxq->queue_id = rxq->queue_base;
577 }
578
579 static void
580 _avp_set_queue_counts(struct rte_eth_dev *eth_dev)
581 {
582         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
583         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
584         struct rte_avp_device_info *host_info;
585         void *addr;
586
587         addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
588         host_info = (struct rte_avp_device_info *)addr;
589
590         /*
591          * the transmit direction is not negotiated beyond respecting the max
592          * number of queues because the host can handle arbitrary guest tx
593          * queues (host rx queues).
594          */
595         avp->num_tx_queues = eth_dev->data->nb_tx_queues;
596
597         /*
598          * the receive direction is more restrictive.  The host requires a
599          * minimum number of guest rx queues (host tx queues) therefore
600          * negotiate a value that is at least as large as the host minimum
601          * requirement.  If the host and guest values are not identical then a
602          * mapping will be established in the receive_queue_setup function.
603          */
604         avp->num_rx_queues = RTE_MAX(host_info->min_rx_queues,
605                                      eth_dev->data->nb_rx_queues);
606
607         PMD_DRV_LOG(DEBUG, "Requesting %u Tx and %u Rx queues from host\n",
608                     avp->num_tx_queues, avp->num_rx_queues);
609 }
610
611 static int
612 avp_dev_attach(struct rte_eth_dev *eth_dev)
613 {
614         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
615         struct rte_avp_device_config config;
616         unsigned int i;
617         int ret;
618
619         PMD_DRV_LOG(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "\n",
620                     eth_dev->data->port_id, avp->device_id);
621
622         rte_spinlock_lock(&avp->lock);
623
624         if (!(avp->flags & AVP_F_DETACHED)) {
625                 PMD_DRV_LOG(NOTICE, "port %u already attached\n",
626                             eth_dev->data->port_id);
627                 ret = 0;
628                 goto unlock;
629         }
630
631         /*
632          * make sure that the detached flag is set prior to reconfiguring the
633          * queues.
634          */
635         avp->flags |= AVP_F_DETACHED;
636         rte_wmb();
637
638         /*
639          * re-run the device create utility which will parse the new host info
640          * and setup the AVP device queue pointers.
641          */
642         ret = avp_dev_create(RTE_ETH_DEV_TO_PCI(eth_dev), eth_dev);
643         if (ret < 0) {
644                 PMD_DRV_LOG(ERR, "Failed to re-create AVP device, ret=%d\n",
645                             ret);
646                 goto unlock;
647         }
648
649         if (avp->flags & AVP_F_CONFIGURED) {
650                 /*
651                  * Update the receive queue mapping to handle cases where the
652                  * source and destination hosts have different queue
653                  * requirements.  As long as the DETACHED flag is asserted the
654                  * queue table should not be referenced so it should be safe to
655                  * update it.
656                  */
657                 _avp_set_queue_counts(eth_dev);
658                 for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
659                         _avp_set_rx_queue_mappings(eth_dev, i);
660
661                 /*
662                  * Update the host with our config details so that it knows the
663                  * device is active.
664                  */
665                 memset(&config, 0, sizeof(config));
666                 config.device_id = avp->device_id;
667                 config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
668                 config.driver_version = AVP_DPDK_DRIVER_VERSION;
669                 config.features = avp->features;
670                 config.num_tx_queues = avp->num_tx_queues;
671                 config.num_rx_queues = avp->num_rx_queues;
672                 config.if_up = !!(avp->flags & AVP_F_LINKUP);
673
674                 ret = avp_dev_ctrl_set_config(eth_dev, &config);
675                 if (ret < 0) {
676                         PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
677                                     ret);
678                         goto unlock;
679                 }
680         }
681
682         rte_wmb();
683         avp->flags &= ~AVP_F_DETACHED;
684
685         ret = 0;
686
687 unlock:
688         rte_spinlock_unlock(&avp->lock);
689         return ret;
690 }
691
692 static void
693 avp_dev_interrupt_handler(void *data)
694 {
695         struct rte_eth_dev *eth_dev = data;
696         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
697         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
698         uint32_t status, value;
699         int ret;
700
701         if (registers == NULL)
702                 rte_panic("no mapped MMIO register space\n");
703
704         /* read the interrupt status register
705          * note: this register clears on read so all raised interrupts must be
706          *    handled or remembered for later processing
707          */
708         status = AVP_READ32(
709                 RTE_PTR_ADD(registers,
710                             RTE_AVP_INTERRUPT_STATUS_OFFSET));
711
712         if (status & RTE_AVP_MIGRATION_INTERRUPT_MASK) {
713                 /* handle interrupt based on current status */
714                 value = AVP_READ32(
715                         RTE_PTR_ADD(registers,
716                                     RTE_AVP_MIGRATION_STATUS_OFFSET));
717                 switch (value) {
718                 case RTE_AVP_MIGRATION_DETACHED:
719                         ret = avp_dev_detach(eth_dev);
720                         break;
721                 case RTE_AVP_MIGRATION_ATTACHED:
722                         ret = avp_dev_attach(eth_dev);
723                         break;
724                 default:
725                         PMD_DRV_LOG(ERR, "unexpected migration status, status=%u\n",
726                                     value);
727                         ret = -EINVAL;
728                 }
729
730                 /* acknowledge the request by writing out our current status */
731                 value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR);
732                 AVP_WRITE32(value,
733                             RTE_PTR_ADD(registers,
734                                         RTE_AVP_MIGRATION_ACK_OFFSET));
735
736                 PMD_DRV_LOG(NOTICE, "AVP migration interrupt handled\n");
737         }
738
739         if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK)
740                 PMD_DRV_LOG(WARNING, "AVP unexpected interrupt, status=0x%08x\n",
741                             status);
742
743         /* re-enable UIO interrupt handling */
744         ret = rte_intr_enable(&pci_dev->intr_handle);
745         if (ret < 0) {
746                 PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n",
747                             ret);
748                 /* continue */
749         }
750 }
751
752 static int
753 avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev)
754 {
755         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
756         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
757         int ret;
758
759         if (registers == NULL)
760                 return -EINVAL;
761
762         /* enable UIO interrupt handling */
763         ret = rte_intr_enable(&pci_dev->intr_handle);
764         if (ret < 0) {
765                 PMD_DRV_LOG(ERR, "Failed to enable UIO interrupts, ret=%d\n",
766                             ret);
767                 return ret;
768         }
769
770         /* inform the device that all interrupts are enabled */
771         AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK,
772                     RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
773
774         return 0;
775 }
776
777 static int
778 avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev)
779 {
780         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
781         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
782         int ret;
783
784         if (registers == NULL)
785                 return 0;
786
787         /* inform the device that all interrupts are disabled */
788         AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK,
789                     RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
790
791         /* enable UIO interrupt handling */
792         ret = rte_intr_disable(&pci_dev->intr_handle);
793         if (ret < 0) {
794                 PMD_DRV_LOG(ERR, "Failed to disable UIO interrupts, ret=%d\n",
795                             ret);
796                 return ret;
797         }
798
799         return 0;
800 }
801
802 static int
803 avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev)
804 {
805         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
806         int ret;
807
808         /* register a callback handler with UIO for interrupt notifications */
809         ret = rte_intr_callback_register(&pci_dev->intr_handle,
810                                          avp_dev_interrupt_handler,
811                                          (void *)eth_dev);
812         if (ret < 0) {
813                 PMD_DRV_LOG(ERR, "Failed to register UIO interrupt callback, ret=%d\n",
814                             ret);
815                 return ret;
816         }
817
818         /* enable interrupt processing */
819         return avp_dev_enable_interrupts(eth_dev);
820 }
821
822 static int
823 avp_dev_migration_pending(struct rte_eth_dev *eth_dev)
824 {
825         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
826         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
827         uint32_t value;
828
829         if (registers == NULL)
830                 return 0;
831
832         value = AVP_READ32(RTE_PTR_ADD(registers,
833                                        RTE_AVP_MIGRATION_STATUS_OFFSET));
834         if (value == RTE_AVP_MIGRATION_DETACHED) {
835                 /* migration is in progress; ack it if we have not already */
836                 AVP_WRITE32(value,
837                             RTE_PTR_ADD(registers,
838                                         RTE_AVP_MIGRATION_ACK_OFFSET));
839                 return 1;
840         }
841         return 0;
842 }
843
844 /*
845  * create a AVP device using the supplied device info by first translating it
846  * to guest address space(s).
847  */
848 static int
849 avp_dev_create(struct rte_pci_device *pci_dev,
850                struct rte_eth_dev *eth_dev)
851 {
852         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
853         struct rte_avp_device_info *host_info;
854         struct rte_mem_resource *resource;
855         unsigned int i;
856
857         resource = &pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR];
858         if (resource->addr == NULL) {
859                 PMD_DRV_LOG(ERR, "BAR%u is not mapped\n",
860                             RTE_AVP_PCI_DEVICE_BAR);
861                 return -EFAULT;
862         }
863         host_info = (struct rte_avp_device_info *)resource->addr;
864
865         if ((host_info->magic != RTE_AVP_DEVICE_MAGIC) ||
866                 avp_dev_version_check(host_info->version)) {
867                 PMD_DRV_LOG(ERR, "Invalid AVP PCI device, magic 0x%08x version 0x%08x > 0x%08x\n",
868                             host_info->magic, host_info->version,
869                             AVP_DPDK_DRIVER_VERSION);
870                 return -EINVAL;
871         }
872
873         PMD_DRV_LOG(DEBUG, "AVP host device is v%u.%u.%u\n",
874                     RTE_AVP_GET_RELEASE_VERSION(host_info->version),
875                     RTE_AVP_GET_MAJOR_VERSION(host_info->version),
876                     RTE_AVP_GET_MINOR_VERSION(host_info->version));
877
878         PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u TX queue(s)\n",
879                     host_info->min_tx_queues, host_info->max_tx_queues);
880         PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u RX queue(s)\n",
881                     host_info->min_rx_queues, host_info->max_rx_queues);
882         PMD_DRV_LOG(DEBUG, "AVP host supports features 0x%08x\n",
883                     host_info->features);
884
885         if (avp->magic != AVP_ETHDEV_MAGIC) {
886                 /*
887                  * First time initialization (i.e., not during a VM
888                  * migration)
889                  */
890                 memset(avp, 0, sizeof(*avp));
891                 avp->magic = AVP_ETHDEV_MAGIC;
892                 avp->dev_data = eth_dev->data;
893                 avp->port_id = eth_dev->data->port_id;
894                 avp->host_mbuf_size = host_info->mbuf_size;
895                 avp->host_features = host_info->features;
896                 rte_spinlock_init(&avp->lock);
897                 memcpy(&avp->ethaddr.addr_bytes[0],
898                        host_info->ethaddr, ETHER_ADDR_LEN);
899                 /* adjust max values to not exceed our max */
900                 avp->max_tx_queues =
901                         RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES);
902                 avp->max_rx_queues =
903                         RTE_MIN(host_info->max_rx_queues, RTE_AVP_MAX_QUEUES);
904         } else {
905                 /* Re-attaching during migration */
906
907                 /* TODO... requires validation of host values */
908                 if ((host_info->features & avp->features) != avp->features) {
909                         PMD_DRV_LOG(ERR, "AVP host features mismatched; 0x%08x, host=0x%08x\n",
910                                     avp->features, host_info->features);
911                         /* this should not be possible; continue for now */
912                 }
913         }
914
915         /* the device id is allowed to change over migrations */
916         avp->device_id = host_info->device_id;
917
918         /* translate incoming host addresses to guest address space */
919         PMD_DRV_LOG(DEBUG, "AVP first host tx queue at 0x%" PRIx64 "\n",
920                     host_info->tx_phys);
921         PMD_DRV_LOG(DEBUG, "AVP first host alloc queue at 0x%" PRIx64 "\n",
922                     host_info->alloc_phys);
923         for (i = 0; i < avp->max_tx_queues; i++) {
924                 avp->tx_q[i] = avp_dev_translate_address(eth_dev,
925                         host_info->tx_phys + (i * host_info->tx_size));
926
927                 avp->alloc_q[i] = avp_dev_translate_address(eth_dev,
928                         host_info->alloc_phys + (i * host_info->alloc_size));
929         }
930
931         PMD_DRV_LOG(DEBUG, "AVP first host rx queue at 0x%" PRIx64 "\n",
932                     host_info->rx_phys);
933         PMD_DRV_LOG(DEBUG, "AVP first host free queue at 0x%" PRIx64 "\n",
934                     host_info->free_phys);
935         for (i = 0; i < avp->max_rx_queues; i++) {
936                 avp->rx_q[i] = avp_dev_translate_address(eth_dev,
937                         host_info->rx_phys + (i * host_info->rx_size));
938                 avp->free_q[i] = avp_dev_translate_address(eth_dev,
939                         host_info->free_phys + (i * host_info->free_size));
940         }
941
942         PMD_DRV_LOG(DEBUG, "AVP host request queue at 0x%" PRIx64 "\n",
943                     host_info->req_phys);
944         PMD_DRV_LOG(DEBUG, "AVP host response queue at 0x%" PRIx64 "\n",
945                     host_info->resp_phys);
946         PMD_DRV_LOG(DEBUG, "AVP host sync address at 0x%" PRIx64 "\n",
947                     host_info->sync_phys);
948         PMD_DRV_LOG(DEBUG, "AVP host mbuf address at 0x%" PRIx64 "\n",
949                     host_info->mbuf_phys);
950         avp->req_q = avp_dev_translate_address(eth_dev, host_info->req_phys);
951         avp->resp_q = avp_dev_translate_address(eth_dev, host_info->resp_phys);
952         avp->sync_addr =
953                 avp_dev_translate_address(eth_dev, host_info->sync_phys);
954         avp->mbuf_addr =
955                 avp_dev_translate_address(eth_dev, host_info->mbuf_phys);
956
957         /*
958          * store the host mbuf virtual address so that we can calculate
959          * relative offsets for each mbuf as they are processed
960          */
961         avp->host_mbuf_addr = host_info->mbuf_va;
962         avp->host_sync_addr = host_info->sync_va;
963
964         /*
965          * store the maximum packet length that is supported by the host.
966          */
967         avp->max_rx_pkt_len = host_info->max_rx_pkt_len;
968         PMD_DRV_LOG(DEBUG, "AVP host max receive packet length is %u\n",
969                                 host_info->max_rx_pkt_len);
970
971         return 0;
972 }
973
974 /*
975  * This function is based on probe() function in avp_pci.c
976  * It returns 0 on success.
977  */
978 static int
979 eth_avp_dev_init(struct rte_eth_dev *eth_dev)
980 {
981         struct avp_dev *avp =
982                 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
983         struct rte_pci_device *pci_dev;
984         int ret;
985
986         pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
987         eth_dev->dev_ops = &avp_eth_dev_ops;
988         eth_dev->rx_pkt_burst = &avp_recv_pkts;
989         eth_dev->tx_pkt_burst = &avp_xmit_pkts;
990
991         if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
992                 /*
993                  * no setup required on secondary processes.  All data is saved
994                  * in dev_private by the primary process. All resource should
995                  * be mapped to the same virtual address so all pointers should
996                  * be valid.
997                  */
998                 if (eth_dev->data->scattered_rx) {
999                         PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
1000                         eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1001                         eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1002                 }
1003                 return 0;
1004         }
1005
1006         rte_eth_copy_pci_info(eth_dev, pci_dev);
1007
1008         /* Check current migration status */
1009         if (avp_dev_migration_pending(eth_dev)) {
1010                 PMD_DRV_LOG(ERR, "VM live migration operation in progress\n");
1011                 return -EBUSY;
1012         }
1013
1014         /* Check BAR resources */
1015         ret = avp_dev_check_regions(eth_dev);
1016         if (ret < 0) {
1017                 PMD_DRV_LOG(ERR, "Failed to validate BAR resources, ret=%d\n",
1018                             ret);
1019                 return ret;
1020         }
1021
1022         /* Enable interrupts */
1023         ret = avp_dev_setup_interrupts(eth_dev);
1024         if (ret < 0) {
1025                 PMD_DRV_LOG(ERR, "Failed to enable interrupts, ret=%d\n", ret);
1026                 return ret;
1027         }
1028
1029         /* Handle each subtype */
1030         ret = avp_dev_create(pci_dev, eth_dev);
1031         if (ret < 0) {
1032                 PMD_DRV_LOG(ERR, "Failed to create device, ret=%d\n", ret);
1033                 return ret;
1034         }
1035
1036         /* Allocate memory for storing MAC addresses */
1037         eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev", ETHER_ADDR_LEN, 0);
1038         if (eth_dev->data->mac_addrs == NULL) {
1039                 PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to store MAC addresses\n",
1040                             ETHER_ADDR_LEN);
1041                 return -ENOMEM;
1042         }
1043
1044         /* Get a mac from device config */
1045         ether_addr_copy(&avp->ethaddr, &eth_dev->data->mac_addrs[0]);
1046
1047         return 0;
1048 }
1049
1050 static int
1051 eth_avp_dev_uninit(struct rte_eth_dev *eth_dev)
1052 {
1053         int ret;
1054
1055         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1056                 return -EPERM;
1057
1058         if (eth_dev->data == NULL)
1059                 return 0;
1060
1061         ret = avp_dev_disable_interrupts(eth_dev);
1062         if (ret != 0) {
1063                 PMD_DRV_LOG(ERR, "Failed to disable interrupts, ret=%d\n", ret);
1064                 return ret;
1065         }
1066
1067         if (eth_dev->data->mac_addrs != NULL) {
1068                 rte_free(eth_dev->data->mac_addrs);
1069                 eth_dev->data->mac_addrs = NULL;
1070         }
1071
1072         return 0;
1073 }
1074
1075 static int
1076 eth_avp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
1077                   struct rte_pci_device *pci_dev)
1078 {
1079         struct rte_eth_dev *eth_dev;
1080         int ret;
1081
1082         eth_dev = rte_eth_dev_pci_allocate(pci_dev,
1083                                            sizeof(struct avp_adapter));
1084         if (eth_dev == NULL)
1085                 return -ENOMEM;
1086
1087         ret = eth_avp_dev_init(eth_dev);
1088         if (ret)
1089                 rte_eth_dev_pci_release(eth_dev);
1090
1091         return ret;
1092 }
1093
1094 static int
1095 eth_avp_pci_remove(struct rte_pci_device *pci_dev)
1096 {
1097         return rte_eth_dev_pci_generic_remove(pci_dev,
1098                                               eth_avp_dev_uninit);
1099 }
1100
1101 static struct rte_pci_driver rte_avp_pmd = {
1102         .id_table = pci_id_avp_map,
1103         .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
1104         .probe = eth_avp_pci_probe,
1105         .remove = eth_avp_pci_remove,
1106 };
1107
1108 static int
1109 avp_dev_enable_scattered(struct rte_eth_dev *eth_dev,
1110                          struct avp_dev *avp)
1111 {
1112         unsigned int max_rx_pkt_len;
1113
1114         max_rx_pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
1115
1116         if ((max_rx_pkt_len > avp->guest_mbuf_size) ||
1117             (max_rx_pkt_len > avp->host_mbuf_size)) {
1118                 /*
1119                  * If the guest MTU is greater than either the host or guest
1120                  * buffers then chained mbufs have to be enabled in the TX
1121                  * direction.  It is assumed that the application will not need
1122                  * to send packets larger than their max_rx_pkt_len (MRU).
1123                  */
1124                 return 1;
1125         }
1126
1127         if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) ||
1128             (avp->max_rx_pkt_len > avp->host_mbuf_size)) {
1129                 /*
1130                  * If the host MRU is greater than its own mbuf size or the
1131                  * guest mbuf size then chained mbufs have to be enabled in the
1132                  * RX direction.
1133                  */
1134                 return 1;
1135         }
1136
1137         return 0;
1138 }
1139
1140 static int
1141 avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
1142                        uint16_t rx_queue_id,
1143                        uint16_t nb_rx_desc,
1144                        unsigned int socket_id,
1145                        const struct rte_eth_rxconf *rx_conf,
1146                        struct rte_mempool *pool)
1147 {
1148         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1149         struct rte_pktmbuf_pool_private *mbp_priv;
1150         struct avp_queue *rxq;
1151
1152         if (rx_queue_id >= eth_dev->data->nb_rx_queues) {
1153                 PMD_DRV_LOG(ERR, "RX queue id is out of range: rx_queue_id=%u, nb_rx_queues=%u\n",
1154                             rx_queue_id, eth_dev->data->nb_rx_queues);
1155                 return -EINVAL;
1156         }
1157
1158         /* Save mbuf pool pointer */
1159         avp->pool = pool;
1160
1161         /* Save the local mbuf size */
1162         mbp_priv = rte_mempool_get_priv(pool);
1163         avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size);
1164         avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM;
1165
1166         if (avp_dev_enable_scattered(eth_dev, avp)) {
1167                 if (!eth_dev->data->scattered_rx) {
1168                         PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
1169                         eth_dev->data->scattered_rx = 1;
1170                         eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1171                         eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1172                 }
1173         }
1174
1175         PMD_DRV_LOG(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)\n",
1176                     avp->max_rx_pkt_len,
1177                     eth_dev->data->dev_conf.rxmode.max_rx_pkt_len,
1178                     avp->host_mbuf_size,
1179                     avp->guest_mbuf_size);
1180
1181         /* allocate a queue object */
1182         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct avp_queue),
1183                                  RTE_CACHE_LINE_SIZE, socket_id);
1184         if (rxq == NULL) {
1185                 PMD_DRV_LOG(ERR, "Failed to allocate new Rx queue object\n");
1186                 return -ENOMEM;
1187         }
1188
1189         /* save back pointers to AVP and Ethernet devices */
1190         rxq->avp = avp;
1191         rxq->dev_data = eth_dev->data;
1192         eth_dev->data->rx_queues[rx_queue_id] = (void *)rxq;
1193
1194         /* setup the queue receive mapping for the current queue. */
1195         _avp_set_rx_queue_mappings(eth_dev, rx_queue_id);
1196
1197         PMD_DRV_LOG(DEBUG, "Rx queue %u setup at %p\n", rx_queue_id, rxq);
1198
1199         (void)nb_rx_desc;
1200         (void)rx_conf;
1201         return 0;
1202 }
1203
1204 static int
1205 avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev,
1206                        uint16_t tx_queue_id,
1207                        uint16_t nb_tx_desc,
1208                        unsigned int socket_id,
1209                        const struct rte_eth_txconf *tx_conf)
1210 {
1211         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1212         struct avp_queue *txq;
1213
1214         if (tx_queue_id >= eth_dev->data->nb_tx_queues) {
1215                 PMD_DRV_LOG(ERR, "TX queue id is out of range: tx_queue_id=%u, nb_tx_queues=%u\n",
1216                             tx_queue_id, eth_dev->data->nb_tx_queues);
1217                 return -EINVAL;
1218         }
1219
1220         /* allocate a queue object */
1221         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct avp_queue),
1222                                  RTE_CACHE_LINE_SIZE, socket_id);
1223         if (txq == NULL) {
1224                 PMD_DRV_LOG(ERR, "Failed to allocate new Tx queue object\n");
1225                 return -ENOMEM;
1226         }
1227
1228         /* only the configured set of transmit queues are used */
1229         txq->queue_id = tx_queue_id;
1230         txq->queue_base = tx_queue_id;
1231         txq->queue_limit = tx_queue_id;
1232
1233         /* save back pointers to AVP and Ethernet devices */
1234         txq->avp = avp;
1235         txq->dev_data = eth_dev->data;
1236         eth_dev->data->tx_queues[tx_queue_id] = (void *)txq;
1237
1238         PMD_DRV_LOG(DEBUG, "Tx queue %u setup at %p\n", tx_queue_id, txq);
1239
1240         (void)nb_tx_desc;
1241         (void)tx_conf;
1242         return 0;
1243 }
1244
1245 static inline int
1246 _avp_cmp_ether_addr(struct ether_addr *a, struct ether_addr *b)
1247 {
1248         uint16_t *_a = (uint16_t *)&a->addr_bytes[0];
1249         uint16_t *_b = (uint16_t *)&b->addr_bytes[0];
1250         return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]);
1251 }
1252
1253 static inline int
1254 _avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m)
1255 {
1256         struct ether_hdr *eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
1257
1258         if (likely(_avp_cmp_ether_addr(&avp->ethaddr, &eth->d_addr) == 0)) {
1259                 /* allow all packets destined to our address */
1260                 return 0;
1261         }
1262
1263         if (likely(is_broadcast_ether_addr(&eth->d_addr))) {
1264                 /* allow all broadcast packets */
1265                 return 0;
1266         }
1267
1268         if (likely(is_multicast_ether_addr(&eth->d_addr))) {
1269                 /* allow all multicast packets */
1270                 return 0;
1271         }
1272
1273         if (avp->flags & AVP_F_PROMISC) {
1274                 /* allow all packets when in promiscuous mode */
1275                 return 0;
1276         }
1277
1278         return -1;
1279 }
1280
1281 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1282 static inline void
1283 __avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf)
1284 {
1285         struct rte_avp_desc *first_buf;
1286         struct rte_avp_desc *pkt_buf;
1287         unsigned int pkt_len;
1288         unsigned int nb_segs;
1289         void *pkt_data;
1290         unsigned int i;
1291
1292         first_buf = avp_dev_translate_buffer(avp, buf);
1293
1294         i = 0;
1295         pkt_len = 0;
1296         nb_segs = first_buf->nb_segs;
1297         do {
1298                 /* Adjust pointers for guest addressing */
1299                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1300                 if (pkt_buf == NULL)
1301                         rte_panic("bad buffer: segment %u has an invalid address %p\n",
1302                                   i, buf);
1303                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1304                 if (pkt_data == NULL)
1305                         rte_panic("bad buffer: segment %u has a NULL data pointer\n",
1306                                   i);
1307                 if (pkt_buf->data_len == 0)
1308                         rte_panic("bad buffer: segment %u has 0 data length\n",
1309                                   i);
1310                 pkt_len += pkt_buf->data_len;
1311                 nb_segs--;
1312                 i++;
1313
1314         } while (nb_segs && (buf = pkt_buf->next) != NULL);
1315
1316         if (nb_segs != 0)
1317                 rte_panic("bad buffer: expected %u segments found %u\n",
1318                           first_buf->nb_segs, (first_buf->nb_segs - nb_segs));
1319         if (pkt_len != first_buf->pkt_len)
1320                 rte_panic("bad buffer: expected length %u found %u\n",
1321                           first_buf->pkt_len, pkt_len);
1322 }
1323
1324 #define avp_dev_buffer_sanity_check(a, b) \
1325         __avp_dev_buffer_sanity_check((a), (b))
1326
1327 #else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */
1328
1329 #define avp_dev_buffer_sanity_check(a, b) do {} while (0)
1330
1331 #endif
1332
1333 /*
1334  * Copy a host buffer chain to a set of mbufs.  This function assumes that
1335  * there exactly the required number of mbufs to copy all source bytes.
1336  */
1337 static inline struct rte_mbuf *
1338 avp_dev_copy_from_buffers(struct avp_dev *avp,
1339                           struct rte_avp_desc *buf,
1340                           struct rte_mbuf **mbufs,
1341                           unsigned int count)
1342 {
1343         struct rte_mbuf *m_previous = NULL;
1344         struct rte_avp_desc *pkt_buf;
1345         unsigned int total_length = 0;
1346         unsigned int copy_length;
1347         unsigned int src_offset;
1348         struct rte_mbuf *m;
1349         uint16_t ol_flags;
1350         uint16_t vlan_tci;
1351         void *pkt_data;
1352         unsigned int i;
1353
1354         avp_dev_buffer_sanity_check(avp, buf);
1355
1356         /* setup the first source buffer */
1357         pkt_buf = avp_dev_translate_buffer(avp, buf);
1358         pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1359         total_length = pkt_buf->pkt_len;
1360         src_offset = 0;
1361
1362         if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1363                 ol_flags = PKT_RX_VLAN;
1364                 vlan_tci = pkt_buf->vlan_tci;
1365         } else {
1366                 ol_flags = 0;
1367                 vlan_tci = 0;
1368         }
1369
1370         for (i = 0; (i < count) && (buf != NULL); i++) {
1371                 /* fill each destination buffer */
1372                 m = mbufs[i];
1373
1374                 if (m_previous != NULL)
1375                         m_previous->next = m;
1376
1377                 m_previous = m;
1378
1379                 do {
1380                         /*
1381                          * Copy as many source buffers as will fit in the
1382                          * destination buffer.
1383                          */
1384                         copy_length = RTE_MIN((avp->guest_mbuf_size -
1385                                                rte_pktmbuf_data_len(m)),
1386                                               (pkt_buf->data_len -
1387                                                src_offset));
1388                         rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1389                                                rte_pktmbuf_data_len(m)),
1390                                    RTE_PTR_ADD(pkt_data, src_offset),
1391                                    copy_length);
1392                         rte_pktmbuf_data_len(m) += copy_length;
1393                         src_offset += copy_length;
1394
1395                         if (likely(src_offset == pkt_buf->data_len)) {
1396                                 /* need a new source buffer */
1397                                 buf = pkt_buf->next;
1398                                 if (buf != NULL) {
1399                                         pkt_buf = avp_dev_translate_buffer(
1400                                                 avp, buf);
1401                                         pkt_data = avp_dev_translate_buffer(
1402                                                 avp, pkt_buf->data);
1403                                         src_offset = 0;
1404                                 }
1405                         }
1406
1407                         if (unlikely(rte_pktmbuf_data_len(m) ==
1408                                      avp->guest_mbuf_size)) {
1409                                 /* need a new destination mbuf */
1410                                 break;
1411                         }
1412
1413                 } while (buf != NULL);
1414         }
1415
1416         m = mbufs[0];
1417         m->ol_flags = ol_flags;
1418         m->nb_segs = count;
1419         rte_pktmbuf_pkt_len(m) = total_length;
1420         m->vlan_tci = vlan_tci;
1421
1422         __rte_mbuf_sanity_check(m, 1);
1423
1424         return m;
1425 }
1426
1427 static uint16_t
1428 avp_recv_scattered_pkts(void *rx_queue,
1429                         struct rte_mbuf **rx_pkts,
1430                         uint16_t nb_pkts)
1431 {
1432         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1433         struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1434         struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS];
1435         struct avp_dev *avp = rxq->avp;
1436         struct rte_avp_desc *pkt_buf;
1437         struct rte_avp_fifo *free_q;
1438         struct rte_avp_fifo *rx_q;
1439         struct rte_avp_desc *buf;
1440         unsigned int count, avail, n;
1441         unsigned int guest_mbuf_size;
1442         struct rte_mbuf *m;
1443         unsigned int required;
1444         unsigned int buf_len;
1445         unsigned int port_id;
1446         unsigned int i;
1447
1448         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1449                 /* VM live migration in progress */
1450                 return 0;
1451         }
1452
1453         guest_mbuf_size = avp->guest_mbuf_size;
1454         port_id = avp->port_id;
1455         rx_q = avp->rx_q[rxq->queue_id];
1456         free_q = avp->free_q[rxq->queue_id];
1457
1458         /* setup next queue to service */
1459         rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1460                 (rxq->queue_id + 1) : rxq->queue_base;
1461
1462         /* determine how many slots are available in the free queue */
1463         count = avp_fifo_free_count(free_q);
1464
1465         /* determine how many packets are available in the rx queue */
1466         avail = avp_fifo_count(rx_q);
1467
1468         /* determine how many packets can be received */
1469         count = RTE_MIN(count, avail);
1470         count = RTE_MIN(count, nb_pkts);
1471         count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1472
1473         if (unlikely(count == 0)) {
1474                 /* no free buffers, or no buffers on the rx queue */
1475                 return 0;
1476         }
1477
1478         /* retrieve pending packets */
1479         n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1480         PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1481                    count, rx_q);
1482
1483         count = 0;
1484         for (i = 0; i < n; i++) {
1485                 /* prefetch next entry while processing current one */
1486                 if (i + 1 < n) {
1487                         pkt_buf = avp_dev_translate_buffer(avp,
1488                                                            avp_bufs[i + 1]);
1489                         rte_prefetch0(pkt_buf);
1490                 }
1491                 buf = avp_bufs[i];
1492
1493                 /* Peek into the first buffer to determine the total length */
1494                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1495                 buf_len = pkt_buf->pkt_len;
1496
1497                 /* Allocate enough mbufs to receive the entire packet */
1498                 required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size;
1499                 if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) {
1500                         rxq->dev_data->rx_mbuf_alloc_failed++;
1501                         continue;
1502                 }
1503
1504                 /* Copy the data from the buffers to our mbufs */
1505                 m = avp_dev_copy_from_buffers(avp, buf, mbufs, required);
1506
1507                 /* finalize mbuf */
1508                 m->port = port_id;
1509
1510                 if (_avp_mac_filter(avp, m) != 0) {
1511                         /* silently discard packets not destined to our MAC */
1512                         rte_pktmbuf_free(m);
1513                         continue;
1514                 }
1515
1516                 /* return new mbuf to caller */
1517                 rx_pkts[count++] = m;
1518                 rxq->bytes += buf_len;
1519         }
1520
1521         rxq->packets += count;
1522
1523         /* return the buffers to the free queue */
1524         avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1525
1526         return count;
1527 }
1528
1529
1530 static uint16_t
1531 avp_recv_pkts(void *rx_queue,
1532               struct rte_mbuf **rx_pkts,
1533               uint16_t nb_pkts)
1534 {
1535         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1536         struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1537         struct avp_dev *avp = rxq->avp;
1538         struct rte_avp_desc *pkt_buf;
1539         struct rte_avp_fifo *free_q;
1540         struct rte_avp_fifo *rx_q;
1541         unsigned int count, avail, n;
1542         unsigned int pkt_len;
1543         struct rte_mbuf *m;
1544         char *pkt_data;
1545         unsigned int i;
1546
1547         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1548                 /* VM live migration in progress */
1549                 return 0;
1550         }
1551
1552         rx_q = avp->rx_q[rxq->queue_id];
1553         free_q = avp->free_q[rxq->queue_id];
1554
1555         /* setup next queue to service */
1556         rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1557                 (rxq->queue_id + 1) : rxq->queue_base;
1558
1559         /* determine how many slots are available in the free queue */
1560         count = avp_fifo_free_count(free_q);
1561
1562         /* determine how many packets are available in the rx queue */
1563         avail = avp_fifo_count(rx_q);
1564
1565         /* determine how many packets can be received */
1566         count = RTE_MIN(count, avail);
1567         count = RTE_MIN(count, nb_pkts);
1568         count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1569
1570         if (unlikely(count == 0)) {
1571                 /* no free buffers, or no buffers on the rx queue */
1572                 return 0;
1573         }
1574
1575         /* retrieve pending packets */
1576         n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1577         PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1578                    count, rx_q);
1579
1580         count = 0;
1581         for (i = 0; i < n; i++) {
1582                 /* prefetch next entry while processing current one */
1583                 if (i < n - 1) {
1584                         pkt_buf = avp_dev_translate_buffer(avp,
1585                                                            avp_bufs[i + 1]);
1586                         rte_prefetch0(pkt_buf);
1587                 }
1588
1589                 /* Adjust host pointers for guest addressing */
1590                 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1591                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1592                 pkt_len = pkt_buf->pkt_len;
1593
1594                 if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1595                              (pkt_buf->nb_segs > 1))) {
1596                         /*
1597                          * application should be using the scattered receive
1598                          * function
1599                          */
1600                         rxq->errors++;
1601                         continue;
1602                 }
1603
1604                 /* process each packet to be transmitted */
1605                 m = rte_pktmbuf_alloc(avp->pool);
1606                 if (unlikely(m == NULL)) {
1607                         rxq->dev_data->rx_mbuf_alloc_failed++;
1608                         continue;
1609                 }
1610
1611                 /* copy data out of the host buffer to our buffer */
1612                 m->data_off = RTE_PKTMBUF_HEADROOM;
1613                 rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len);
1614
1615                 /* initialize the local mbuf */
1616                 rte_pktmbuf_data_len(m) = pkt_len;
1617                 rte_pktmbuf_pkt_len(m) = pkt_len;
1618                 m->port = avp->port_id;
1619
1620                 if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1621                         m->ol_flags = PKT_RX_VLAN;
1622                         m->vlan_tci = pkt_buf->vlan_tci;
1623                 }
1624
1625                 if (_avp_mac_filter(avp, m) != 0) {
1626                         /* silently discard packets not destined to our MAC */
1627                         rte_pktmbuf_free(m);
1628                         continue;
1629                 }
1630
1631                 /* return new mbuf to caller */
1632                 rx_pkts[count++] = m;
1633                 rxq->bytes += pkt_len;
1634         }
1635
1636         rxq->packets += count;
1637
1638         /* return the buffers to the free queue */
1639         avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1640
1641         return count;
1642 }
1643
1644 /*
1645  * Copy a chained mbuf to a set of host buffers.  This function assumes that
1646  * there are sufficient destination buffers to contain the entire source
1647  * packet.
1648  */
1649 static inline uint16_t
1650 avp_dev_copy_to_buffers(struct avp_dev *avp,
1651                         struct rte_mbuf *mbuf,
1652                         struct rte_avp_desc **buffers,
1653                         unsigned int count)
1654 {
1655         struct rte_avp_desc *previous_buf = NULL;
1656         struct rte_avp_desc *first_buf = NULL;
1657         struct rte_avp_desc *pkt_buf;
1658         struct rte_avp_desc *buf;
1659         size_t total_length;
1660         struct rte_mbuf *m;
1661         size_t copy_length;
1662         size_t src_offset;
1663         char *pkt_data;
1664         unsigned int i;
1665
1666         __rte_mbuf_sanity_check(mbuf, 1);
1667
1668         m = mbuf;
1669         src_offset = 0;
1670         total_length = rte_pktmbuf_pkt_len(m);
1671         for (i = 0; (i < count) && (m != NULL); i++) {
1672                 /* fill each destination buffer */
1673                 buf = buffers[i];
1674
1675                 if (i < count - 1) {
1676                         /* prefetch next entry while processing this one */
1677                         pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]);
1678                         rte_prefetch0(pkt_buf);
1679                 }
1680
1681                 /* Adjust pointers for guest addressing */
1682                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1683                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1684
1685                 /* setup the buffer chain */
1686                 if (previous_buf != NULL)
1687                         previous_buf->next = buf;
1688                 else
1689                         first_buf = pkt_buf;
1690
1691                 previous_buf = pkt_buf;
1692
1693                 do {
1694                         /*
1695                          * copy as many source mbuf segments as will fit in the
1696                          * destination buffer.
1697                          */
1698                         copy_length = RTE_MIN((avp->host_mbuf_size -
1699                                                pkt_buf->data_len),
1700                                               (rte_pktmbuf_data_len(m) -
1701                                                src_offset));
1702                         rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len),
1703                                    RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1704                                                src_offset),
1705                                    copy_length);
1706                         pkt_buf->data_len += copy_length;
1707                         src_offset += copy_length;
1708
1709                         if (likely(src_offset == rte_pktmbuf_data_len(m))) {
1710                                 /* need a new source buffer */
1711                                 m = m->next;
1712                                 src_offset = 0;
1713                         }
1714
1715                         if (unlikely(pkt_buf->data_len ==
1716                                      avp->host_mbuf_size)) {
1717                                 /* need a new destination buffer */
1718                                 break;
1719                         }
1720
1721                 } while (m != NULL);
1722         }
1723
1724         first_buf->nb_segs = count;
1725         first_buf->pkt_len = total_length;
1726
1727         if (mbuf->ol_flags & PKT_TX_VLAN_PKT) {
1728                 first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1729                 first_buf->vlan_tci = mbuf->vlan_tci;
1730         }
1731
1732         avp_dev_buffer_sanity_check(avp, buffers[0]);
1733
1734         return total_length;
1735 }
1736
1737
1738 static uint16_t
1739 avp_xmit_scattered_pkts(void *tx_queue,
1740                         struct rte_mbuf **tx_pkts,
1741                         uint16_t nb_pkts)
1742 {
1743         struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST *
1744                                        RTE_AVP_MAX_MBUF_SEGMENTS)];
1745         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1746         struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST];
1747         struct avp_dev *avp = txq->avp;
1748         struct rte_avp_fifo *alloc_q;
1749         struct rte_avp_fifo *tx_q;
1750         unsigned int count, avail, n;
1751         unsigned int orig_nb_pkts;
1752         struct rte_mbuf *m;
1753         unsigned int required;
1754         unsigned int segments;
1755         unsigned int tx_bytes;
1756         unsigned int i;
1757
1758         orig_nb_pkts = nb_pkts;
1759         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1760                 /* VM live migration in progress */
1761                 /* TODO ... buffer for X packets then drop? */
1762                 txq->errors += nb_pkts;
1763                 return 0;
1764         }
1765
1766         tx_q = avp->tx_q[txq->queue_id];
1767         alloc_q = avp->alloc_q[txq->queue_id];
1768
1769         /* limit the number of transmitted packets to the max burst size */
1770         if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1771                 nb_pkts = AVP_MAX_TX_BURST;
1772
1773         /* determine how many buffers are available to copy into */
1774         avail = avp_fifo_count(alloc_q);
1775         if (unlikely(avail > (AVP_MAX_TX_BURST *
1776                               RTE_AVP_MAX_MBUF_SEGMENTS)))
1777                 avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS;
1778
1779         /* determine how many slots are available in the transmit queue */
1780         count = avp_fifo_free_count(tx_q);
1781
1782         /* determine how many packets can be sent */
1783         nb_pkts = RTE_MIN(count, nb_pkts);
1784
1785         /* determine how many packets will fit in the available buffers */
1786         count = 0;
1787         segments = 0;
1788         for (i = 0; i < nb_pkts; i++) {
1789                 m = tx_pkts[i];
1790                 if (likely(i < (unsigned int)nb_pkts - 1)) {
1791                         /* prefetch next entry while processing this one */
1792                         rte_prefetch0(tx_pkts[i + 1]);
1793                 }
1794                 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1795                         avp->host_mbuf_size;
1796
1797                 if (unlikely((required == 0) ||
1798                              (required > RTE_AVP_MAX_MBUF_SEGMENTS)))
1799                         break;
1800                 else if (unlikely(required + segments > avail))
1801                         break;
1802                 segments += required;
1803                 count++;
1804         }
1805         nb_pkts = count;
1806
1807         if (unlikely(nb_pkts == 0)) {
1808                 /* no available buffers, or no space on the tx queue */
1809                 txq->errors += orig_nb_pkts;
1810                 return 0;
1811         }
1812
1813         PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1814                    nb_pkts, tx_q);
1815
1816         /* retrieve sufficient send buffers */
1817         n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments);
1818         if (unlikely(n != segments)) {
1819                 PMD_TX_LOG(DEBUG, "Failed to allocate buffers "
1820                            "n=%u, segments=%u, orig=%u\n",
1821                            n, segments, orig_nb_pkts);
1822                 txq->errors += orig_nb_pkts;
1823                 return 0;
1824         }
1825
1826         tx_bytes = 0;
1827         count = 0;
1828         for (i = 0; i < nb_pkts; i++) {
1829                 /* process each packet to be transmitted */
1830                 m = tx_pkts[i];
1831
1832                 /* determine how many buffers are required for this packet */
1833                 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1834                         avp->host_mbuf_size;
1835
1836                 tx_bytes += avp_dev_copy_to_buffers(avp, m,
1837                                                     &avp_bufs[count], required);
1838                 tx_bufs[i] = avp_bufs[count];
1839                 count += required;
1840
1841                 /* free the original mbuf */
1842                 rte_pktmbuf_free(m);
1843         }
1844
1845         txq->packets += nb_pkts;
1846         txq->bytes += tx_bytes;
1847
1848 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1849         for (i = 0; i < nb_pkts; i++)
1850                 avp_dev_buffer_sanity_check(avp, tx_bufs[i]);
1851 #endif
1852
1853         /* send the packets */
1854         n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts);
1855         if (unlikely(n != orig_nb_pkts))
1856                 txq->errors += (orig_nb_pkts - n);
1857
1858         return n;
1859 }
1860
1861
1862 static uint16_t
1863 avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1864 {
1865         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1866         struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST];
1867         struct avp_dev *avp = txq->avp;
1868         struct rte_avp_desc *pkt_buf;
1869         struct rte_avp_fifo *alloc_q;
1870         struct rte_avp_fifo *tx_q;
1871         unsigned int count, avail, n;
1872         struct rte_mbuf *m;
1873         unsigned int pkt_len;
1874         unsigned int tx_bytes;
1875         char *pkt_data;
1876         unsigned int i;
1877
1878         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1879                 /* VM live migration in progress */
1880                 /* TODO ... buffer for X packets then drop?! */
1881                 txq->errors++;
1882                 return 0;
1883         }
1884
1885         tx_q = avp->tx_q[txq->queue_id];
1886         alloc_q = avp->alloc_q[txq->queue_id];
1887
1888         /* limit the number of transmitted packets to the max burst size */
1889         if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1890                 nb_pkts = AVP_MAX_TX_BURST;
1891
1892         /* determine how many buffers are available to copy into */
1893         avail = avp_fifo_count(alloc_q);
1894
1895         /* determine how many slots are available in the transmit queue */
1896         count = avp_fifo_free_count(tx_q);
1897
1898         /* determine how many packets can be sent */
1899         count = RTE_MIN(count, avail);
1900         count = RTE_MIN(count, nb_pkts);
1901
1902         if (unlikely(count == 0)) {
1903                 /* no available buffers, or no space on the tx queue */
1904                 txq->errors += nb_pkts;
1905                 return 0;
1906         }
1907
1908         PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1909                    count, tx_q);
1910
1911         /* retrieve sufficient send buffers */
1912         n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count);
1913         if (unlikely(n != count)) {
1914                 txq->errors++;
1915                 return 0;
1916         }
1917
1918         tx_bytes = 0;
1919         for (i = 0; i < count; i++) {
1920                 /* prefetch next entry while processing the current one */
1921                 if (i < count - 1) {
1922                         pkt_buf = avp_dev_translate_buffer(avp,
1923                                                            avp_bufs[i + 1]);
1924                         rte_prefetch0(pkt_buf);
1925                 }
1926
1927                 /* process each packet to be transmitted */
1928                 m = tx_pkts[i];
1929
1930                 /* Adjust pointers for guest addressing */
1931                 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1932                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1933                 pkt_len = rte_pktmbuf_pkt_len(m);
1934
1935                 if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1936                                          (pkt_len > avp->host_mbuf_size))) {
1937                         /*
1938                          * application should be using the scattered transmit
1939                          * function; send it truncated to avoid the performance
1940                          * hit of having to manage returning the already
1941                          * allocated buffer to the free list.  This should not
1942                          * happen since the application should have set the
1943                          * max_rx_pkt_len based on its MTU and it should be
1944                          * policing its own packet sizes.
1945                          */
1946                         txq->errors++;
1947                         pkt_len = RTE_MIN(avp->guest_mbuf_size,
1948                                           avp->host_mbuf_size);
1949                 }
1950
1951                 /* copy data out of our mbuf and into the AVP buffer */
1952                 rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len);
1953                 pkt_buf->pkt_len = pkt_len;
1954                 pkt_buf->data_len = pkt_len;
1955                 pkt_buf->nb_segs = 1;
1956                 pkt_buf->next = NULL;
1957
1958                 if (m->ol_flags & PKT_TX_VLAN_PKT) {
1959                         pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1960                         pkt_buf->vlan_tci = m->vlan_tci;
1961                 }
1962
1963                 tx_bytes += pkt_len;
1964
1965                 /* free the original mbuf */
1966                 rte_pktmbuf_free(m);
1967         }
1968
1969         txq->packets += count;
1970         txq->bytes += tx_bytes;
1971
1972         /* send the packets */
1973         n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count);
1974
1975         return n;
1976 }
1977
1978 static void
1979 avp_dev_rx_queue_release(void *rx_queue)
1980 {
1981         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1982         struct avp_dev *avp = rxq->avp;
1983         struct rte_eth_dev_data *data = avp->dev_data;
1984         unsigned int i;
1985
1986         for (i = 0; i < avp->num_rx_queues; i++) {
1987                 if (data->rx_queues[i] == rxq)
1988                         data->rx_queues[i] = NULL;
1989         }
1990 }
1991
1992 static void
1993 avp_dev_tx_queue_release(void *tx_queue)
1994 {
1995         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1996         struct avp_dev *avp = txq->avp;
1997         struct rte_eth_dev_data *data = avp->dev_data;
1998         unsigned int i;
1999
2000         for (i = 0; i < avp->num_tx_queues; i++) {
2001                 if (data->tx_queues[i] == txq)
2002                         data->tx_queues[i] = NULL;
2003         }
2004 }
2005
2006 static int
2007 avp_dev_configure(struct rte_eth_dev *eth_dev)
2008 {
2009         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
2010         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2011         struct rte_avp_device_info *host_info;
2012         struct rte_avp_device_config config;
2013         int mask = 0;
2014         void *addr;
2015         int ret;
2016
2017         rte_spinlock_lock(&avp->lock);
2018         if (avp->flags & AVP_F_DETACHED) {
2019                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2020                 ret = -ENOTSUP;
2021                 goto unlock;
2022         }
2023
2024         addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
2025         host_info = (struct rte_avp_device_info *)addr;
2026
2027         /* Setup required number of queues */
2028         _avp_set_queue_counts(eth_dev);
2029
2030         mask = (ETH_VLAN_STRIP_MASK |
2031                 ETH_VLAN_FILTER_MASK |
2032                 ETH_VLAN_EXTEND_MASK);
2033         ret = avp_vlan_offload_set(eth_dev, mask);
2034         if (ret < 0) {
2035                 PMD_DRV_LOG(ERR, "VLAN offload set failed by host, ret=%d\n",
2036                             ret);
2037                 goto unlock;
2038         }
2039
2040         /* update device config */
2041         memset(&config, 0, sizeof(config));
2042         config.device_id = host_info->device_id;
2043         config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
2044         config.driver_version = AVP_DPDK_DRIVER_VERSION;
2045         config.features = avp->features;
2046         config.num_tx_queues = avp->num_tx_queues;
2047         config.num_rx_queues = avp->num_rx_queues;
2048
2049         ret = avp_dev_ctrl_set_config(eth_dev, &config);
2050         if (ret < 0) {
2051                 PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
2052                             ret);
2053                 goto unlock;
2054         }
2055
2056         avp->flags |= AVP_F_CONFIGURED;
2057         ret = 0;
2058
2059 unlock:
2060         rte_spinlock_unlock(&avp->lock);
2061         return ret;
2062 }
2063
2064 static int
2065 avp_dev_start(struct rte_eth_dev *eth_dev)
2066 {
2067         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2068         int ret;
2069
2070         rte_spinlock_lock(&avp->lock);
2071         if (avp->flags & AVP_F_DETACHED) {
2072                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2073                 ret = -ENOTSUP;
2074                 goto unlock;
2075         }
2076
2077         /* disable features that we do not support */
2078         eth_dev->data->dev_conf.rxmode.hw_ip_checksum = 0;
2079         eth_dev->data->dev_conf.rxmode.hw_vlan_filter = 0;
2080         eth_dev->data->dev_conf.rxmode.hw_vlan_extend = 0;
2081         eth_dev->data->dev_conf.rxmode.hw_strip_crc = 0;
2082
2083         /* update link state */
2084         ret = avp_dev_ctrl_set_link_state(eth_dev, 1);
2085         if (ret < 0) {
2086                 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2087                             ret);
2088                 goto unlock;
2089         }
2090
2091         /* remember current link state */
2092         avp->flags |= AVP_F_LINKUP;
2093
2094         ret = 0;
2095
2096 unlock:
2097         rte_spinlock_unlock(&avp->lock);
2098         return ret;
2099 }
2100
2101 static void
2102 avp_dev_stop(struct rte_eth_dev *eth_dev)
2103 {
2104         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2105         int ret;
2106
2107         rte_spinlock_lock(&avp->lock);
2108         if (avp->flags & AVP_F_DETACHED) {
2109                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2110                 goto unlock;
2111         }
2112
2113         /* remember current link state */
2114         avp->flags &= ~AVP_F_LINKUP;
2115
2116         /* update link state */
2117         ret = avp_dev_ctrl_set_link_state(eth_dev, 0);
2118         if (ret < 0) {
2119                 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2120                             ret);
2121         }
2122
2123 unlock:
2124         rte_spinlock_unlock(&avp->lock);
2125 }
2126
2127 static void
2128 avp_dev_close(struct rte_eth_dev *eth_dev)
2129 {
2130         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2131         int ret;
2132
2133         rte_spinlock_lock(&avp->lock);
2134         if (avp->flags & AVP_F_DETACHED) {
2135                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2136                 goto unlock;
2137         }
2138
2139         /* remember current link state */
2140         avp->flags &= ~AVP_F_LINKUP;
2141         avp->flags &= ~AVP_F_CONFIGURED;
2142
2143         ret = avp_dev_disable_interrupts(eth_dev);
2144         if (ret < 0) {
2145                 PMD_DRV_LOG(ERR, "Failed to disable interrupts\n");
2146                 /* continue */
2147         }
2148
2149         /* update device state */
2150         ret = avp_dev_ctrl_shutdown(eth_dev);
2151         if (ret < 0) {
2152                 PMD_DRV_LOG(ERR, "Device shutdown failed by host, ret=%d\n",
2153                             ret);
2154                 /* continue */
2155         }
2156
2157 unlock:
2158         rte_spinlock_unlock(&avp->lock);
2159 }
2160
2161 static int
2162 avp_dev_link_update(struct rte_eth_dev *eth_dev,
2163                                         __rte_unused int wait_to_complete)
2164 {
2165         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2166         struct rte_eth_link *link = &eth_dev->data->dev_link;
2167
2168         link->link_speed = ETH_SPEED_NUM_10G;
2169         link->link_duplex = ETH_LINK_FULL_DUPLEX;
2170         link->link_status = !!(avp->flags & AVP_F_LINKUP);
2171
2172         return -1;
2173 }
2174
2175 static void
2176 avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2177 {
2178         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2179
2180         rte_spinlock_lock(&avp->lock);
2181         if ((avp->flags & AVP_F_PROMISC) == 0) {
2182                 avp->flags |= AVP_F_PROMISC;
2183                 PMD_DRV_LOG(DEBUG, "Promiscuous mode enabled on %u\n",
2184                             eth_dev->data->port_id);
2185         }
2186         rte_spinlock_unlock(&avp->lock);
2187 }
2188
2189 static void
2190 avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev)
2191 {
2192         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2193
2194         rte_spinlock_lock(&avp->lock);
2195         if ((avp->flags & AVP_F_PROMISC) != 0) {
2196                 avp->flags &= ~AVP_F_PROMISC;
2197                 PMD_DRV_LOG(DEBUG, "Promiscuous mode disabled on %u\n",
2198                             eth_dev->data->port_id);
2199         }
2200         rte_spinlock_unlock(&avp->lock);
2201 }
2202
2203 static void
2204 avp_dev_info_get(struct rte_eth_dev *eth_dev,
2205                  struct rte_eth_dev_info *dev_info)
2206 {
2207         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2208
2209         dev_info->pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
2210         dev_info->max_rx_queues = avp->max_rx_queues;
2211         dev_info->max_tx_queues = avp->max_tx_queues;
2212         dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE;
2213         dev_info->max_rx_pktlen = avp->max_rx_pkt_len;
2214         dev_info->max_mac_addrs = AVP_MAX_MAC_ADDRS;
2215         if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2216                 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2217                 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
2218         }
2219 }
2220
2221 static int
2222 avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
2223 {
2224         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2225
2226         if (mask & ETH_VLAN_STRIP_MASK) {
2227                 if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2228                         if (eth_dev->data->dev_conf.rxmode.hw_vlan_strip)
2229                                 avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD;
2230                         else
2231                                 avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD;
2232                 } else {
2233                         PMD_DRV_LOG(ERR, "VLAN strip offload not supported\n");
2234                 }
2235         }
2236
2237         if (mask & ETH_VLAN_FILTER_MASK) {
2238                 if (eth_dev->data->dev_conf.rxmode.hw_vlan_filter)
2239                         PMD_DRV_LOG(ERR, "VLAN filter offload not supported\n");
2240         }
2241
2242         if (mask & ETH_VLAN_EXTEND_MASK) {
2243                 if (eth_dev->data->dev_conf.rxmode.hw_vlan_extend)
2244                         PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n");
2245         }
2246
2247         return 0;
2248 }
2249
2250 static int
2251 avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats)
2252 {
2253         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2254         unsigned int i;
2255
2256         for (i = 0; i < avp->num_rx_queues; i++) {
2257                 struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2258
2259                 if (rxq) {
2260                         stats->ipackets += rxq->packets;
2261                         stats->ibytes += rxq->bytes;
2262                         stats->ierrors += rxq->errors;
2263
2264                         stats->q_ipackets[i] += rxq->packets;
2265                         stats->q_ibytes[i] += rxq->bytes;
2266                         stats->q_errors[i] += rxq->errors;
2267                 }
2268         }
2269
2270         for (i = 0; i < avp->num_tx_queues; i++) {
2271                 struct avp_queue *txq = avp->dev_data->tx_queues[i];
2272
2273                 if (txq) {
2274                         stats->opackets += txq->packets;
2275                         stats->obytes += txq->bytes;
2276                         stats->oerrors += txq->errors;
2277
2278                         stats->q_opackets[i] += txq->packets;
2279                         stats->q_obytes[i] += txq->bytes;
2280                         stats->q_errors[i] += txq->errors;
2281                 }
2282         }
2283
2284         return 0;
2285 }
2286
2287 static void
2288 avp_dev_stats_reset(struct rte_eth_dev *eth_dev)
2289 {
2290         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2291         unsigned int i;
2292
2293         for (i = 0; i < avp->num_rx_queues; i++) {
2294                 struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2295
2296                 if (rxq) {
2297                         rxq->bytes = 0;
2298                         rxq->packets = 0;
2299                         rxq->errors = 0;
2300                 }
2301         }
2302
2303         for (i = 0; i < avp->num_tx_queues; i++) {
2304                 struct avp_queue *txq = avp->dev_data->tx_queues[i];
2305
2306                 if (txq) {
2307                         txq->bytes = 0;
2308                         txq->packets = 0;
2309                         txq->errors = 0;
2310                 }
2311         }
2312 }
2313
2314 RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd);
2315 RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map);