ethdev: allow drivers to return error on close
[dpdk.git] / drivers / net / avp / avp_ethdev.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2013-2017 Wind River Systems, Inc.
3  */
4
5 #include <stdint.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <unistd.h>
10
11 #include <rte_ethdev_driver.h>
12 #include <rte_ethdev_pci.h>
13 #include <rte_memcpy.h>
14 #include <rte_string_fns.h>
15 #include <rte_malloc.h>
16 #include <rte_atomic.h>
17 #include <rte_branch_prediction.h>
18 #include <rte_pci.h>
19 #include <rte_bus_pci.h>
20 #include <rte_ether.h>
21 #include <rte_common.h>
22 #include <rte_cycles.h>
23 #include <rte_spinlock.h>
24 #include <rte_byteorder.h>
25 #include <rte_dev.h>
26 #include <rte_memory.h>
27 #include <rte_eal.h>
28 #include <rte_io.h>
29
30 #include "rte_avp_common.h"
31 #include "rte_avp_fifo.h"
32
33 #include "avp_logs.h"
34
35 static int avp_dev_create(struct rte_pci_device *pci_dev,
36                           struct rte_eth_dev *eth_dev);
37
38 static int avp_dev_configure(struct rte_eth_dev *dev);
39 static int avp_dev_start(struct rte_eth_dev *dev);
40 static void avp_dev_stop(struct rte_eth_dev *dev);
41 static int avp_dev_close(struct rte_eth_dev *dev);
42 static int avp_dev_info_get(struct rte_eth_dev *dev,
43                             struct rte_eth_dev_info *dev_info);
44 static int avp_vlan_offload_set(struct rte_eth_dev *dev, int mask);
45 static int avp_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete);
46 static int avp_dev_promiscuous_enable(struct rte_eth_dev *dev);
47 static int avp_dev_promiscuous_disable(struct rte_eth_dev *dev);
48
49 static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev,
50                                   uint16_t rx_queue_id,
51                                   uint16_t nb_rx_desc,
52                                   unsigned int socket_id,
53                                   const struct rte_eth_rxconf *rx_conf,
54                                   struct rte_mempool *pool);
55
56 static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev,
57                                   uint16_t tx_queue_id,
58                                   uint16_t nb_tx_desc,
59                                   unsigned int socket_id,
60                                   const struct rte_eth_txconf *tx_conf);
61
62 static uint16_t avp_recv_scattered_pkts(void *rx_queue,
63                                         struct rte_mbuf **rx_pkts,
64                                         uint16_t nb_pkts);
65
66 static uint16_t avp_recv_pkts(void *rx_queue,
67                               struct rte_mbuf **rx_pkts,
68                               uint16_t nb_pkts);
69
70 static uint16_t avp_xmit_scattered_pkts(void *tx_queue,
71                                         struct rte_mbuf **tx_pkts,
72                                         uint16_t nb_pkts);
73
74 static uint16_t avp_xmit_pkts(void *tx_queue,
75                               struct rte_mbuf **tx_pkts,
76                               uint16_t nb_pkts);
77
78 static void avp_dev_rx_queue_release(void *rxq);
79 static void avp_dev_tx_queue_release(void *txq);
80
81 static int avp_dev_stats_get(struct rte_eth_dev *dev,
82                               struct rte_eth_stats *stats);
83 static int avp_dev_stats_reset(struct rte_eth_dev *dev);
84
85
86 #define AVP_MAX_RX_BURST 64
87 #define AVP_MAX_TX_BURST 64
88 #define AVP_MAX_MAC_ADDRS 1
89 #define AVP_MIN_RX_BUFSIZE RTE_ETHER_MIN_LEN
90
91
92 /*
93  * Defines the number of microseconds to wait before checking the response
94  * queue for completion.
95  */
96 #define AVP_REQUEST_DELAY_USECS (5000)
97
98 /*
99  * Defines the number times to check the response queue for completion before
100  * declaring a timeout.
101  */
102 #define AVP_MAX_REQUEST_RETRY (100)
103
104 /* Defines the current PCI driver version number */
105 #define AVP_DPDK_DRIVER_VERSION RTE_AVP_CURRENT_GUEST_VERSION
106
107 /*
108  * The set of PCI devices this driver supports
109  */
110 static const struct rte_pci_id pci_id_avp_map[] = {
111         { .vendor_id = RTE_AVP_PCI_VENDOR_ID,
112           .device_id = RTE_AVP_PCI_DEVICE_ID,
113           .subsystem_vendor_id = RTE_AVP_PCI_SUB_VENDOR_ID,
114           .subsystem_device_id = RTE_AVP_PCI_SUB_DEVICE_ID,
115           .class_id = RTE_CLASS_ANY_ID,
116         },
117
118         { .vendor_id = 0, /* sentinel */
119         },
120 };
121
122 /*
123  * dev_ops for avp, bare necessities for basic operation
124  */
125 static const struct eth_dev_ops avp_eth_dev_ops = {
126         .dev_configure       = avp_dev_configure,
127         .dev_start           = avp_dev_start,
128         .dev_stop            = avp_dev_stop,
129         .dev_close           = avp_dev_close,
130         .dev_infos_get       = avp_dev_info_get,
131         .vlan_offload_set    = avp_vlan_offload_set,
132         .stats_get           = avp_dev_stats_get,
133         .stats_reset         = avp_dev_stats_reset,
134         .link_update         = avp_dev_link_update,
135         .promiscuous_enable  = avp_dev_promiscuous_enable,
136         .promiscuous_disable = avp_dev_promiscuous_disable,
137         .rx_queue_setup      = avp_dev_rx_queue_setup,
138         .rx_queue_release    = avp_dev_rx_queue_release,
139         .tx_queue_setup      = avp_dev_tx_queue_setup,
140         .tx_queue_release    = avp_dev_tx_queue_release,
141 };
142
143 /**@{ AVP device flags */
144 #define AVP_F_PROMISC (1 << 1)
145 #define AVP_F_CONFIGURED (1 << 2)
146 #define AVP_F_LINKUP (1 << 3)
147 #define AVP_F_DETACHED (1 << 4)
148 /**@} */
149
150 /* Ethernet device validation marker */
151 #define AVP_ETHDEV_MAGIC 0x92972862
152
153 /*
154  * Defines the AVP device attributes which are attached to an RTE ethernet
155  * device
156  */
157 struct avp_dev {
158         uint32_t magic; /**< Memory validation marker */
159         uint64_t device_id; /**< Unique system identifier */
160         struct rte_ether_addr ethaddr; /**< Host specified MAC address */
161         struct rte_eth_dev_data *dev_data;
162         /**< Back pointer to ethernet device data */
163         volatile uint32_t flags; /**< Device operational flags */
164         uint16_t port_id; /**< Ethernet port identifier */
165         struct rte_mempool *pool; /**< pkt mbuf mempool */
166         unsigned int guest_mbuf_size; /**< local pool mbuf size */
167         unsigned int host_mbuf_size; /**< host mbuf size */
168         unsigned int max_rx_pkt_len; /**< maximum receive unit */
169         uint32_t host_features; /**< Supported feature bitmap */
170         uint32_t features; /**< Enabled feature bitmap */
171         unsigned int num_tx_queues; /**< Negotiated number of transmit queues */
172         unsigned int max_tx_queues; /**< Maximum number of transmit queues */
173         unsigned int num_rx_queues; /**< Negotiated number of receive queues */
174         unsigned int max_rx_queues; /**< Maximum number of receive queues */
175
176         struct rte_avp_fifo *tx_q[RTE_AVP_MAX_QUEUES]; /**< TX queue */
177         struct rte_avp_fifo *rx_q[RTE_AVP_MAX_QUEUES]; /**< RX queue */
178         struct rte_avp_fifo *alloc_q[RTE_AVP_MAX_QUEUES];
179         /**< Allocated mbufs queue */
180         struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES];
181         /**< To be freed mbufs queue */
182
183         /* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */
184         rte_spinlock_t lock;
185
186         /* For request & response */
187         struct rte_avp_fifo *req_q; /**< Request queue */
188         struct rte_avp_fifo *resp_q; /**< Response queue */
189         void *host_sync_addr; /**< (host) Req/Resp Mem address */
190         void *sync_addr; /**< Req/Resp Mem address */
191         void *host_mbuf_addr; /**< (host) MBUF pool start address */
192         void *mbuf_addr; /**< MBUF pool start address */
193 } __rte_cache_aligned;
194
195 /* RTE ethernet private data */
196 struct avp_adapter {
197         struct avp_dev avp;
198 } __rte_cache_aligned;
199
200
201 /* 32-bit MMIO register write */
202 #define AVP_WRITE32(_value, _addr) rte_write32_relaxed((_value), (_addr))
203
204 /* 32-bit MMIO register read */
205 #define AVP_READ32(_addr) rte_read32_relaxed((_addr))
206
207 /* Macro to cast the ethernet device private data to a AVP object */
208 #define AVP_DEV_PRIVATE_TO_HW(adapter) \
209         (&((struct avp_adapter *)adapter)->avp)
210
211 /*
212  * Defines the structure of a AVP device queue for the purpose of handling the
213  * receive and transmit burst callback functions
214  */
215 struct avp_queue {
216         struct rte_eth_dev_data *dev_data;
217         /**< Backpointer to ethernet device data */
218         struct avp_dev *avp; /**< Backpointer to AVP device */
219         uint16_t queue_id;
220         /**< Queue identifier used for indexing current queue */
221         uint16_t queue_base;
222         /**< Base queue identifier for queue servicing */
223         uint16_t queue_limit;
224         /**< Maximum queue identifier for queue servicing */
225
226         uint64_t packets;
227         uint64_t bytes;
228         uint64_t errors;
229 };
230
231 /* send a request and wait for a response
232  *
233  * @warning must be called while holding the avp->lock spinlock.
234  */
235 static int
236 avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request)
237 {
238         unsigned int retry = AVP_MAX_REQUEST_RETRY;
239         void *resp_addr = NULL;
240         unsigned int count;
241         int ret;
242
243         PMD_DRV_LOG(DEBUG, "Sending request %u to host\n", request->req_id);
244
245         request->result = -ENOTSUP;
246
247         /* Discard any stale responses before starting a new request */
248         while (avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1))
249                 PMD_DRV_LOG(DEBUG, "Discarding stale response\n");
250
251         rte_memcpy(avp->sync_addr, request, sizeof(*request));
252         count = avp_fifo_put(avp->req_q, &avp->host_sync_addr, 1);
253         if (count < 1) {
254                 PMD_DRV_LOG(ERR, "Cannot send request %u to host\n",
255                             request->req_id);
256                 ret = -EBUSY;
257                 goto done;
258         }
259
260         while (retry--) {
261                 /* wait for a response */
262                 usleep(AVP_REQUEST_DELAY_USECS);
263
264                 count = avp_fifo_count(avp->resp_q);
265                 if (count >= 1) {
266                         /* response received */
267                         break;
268                 }
269
270                 if ((count < 1) && (retry == 0)) {
271                         PMD_DRV_LOG(ERR, "Timeout while waiting for a response for %u\n",
272                                     request->req_id);
273                         ret = -ETIME;
274                         goto done;
275                 }
276         }
277
278         /* retrieve the response */
279         count = avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1);
280         if ((count != 1) || (resp_addr != avp->host_sync_addr)) {
281                 PMD_DRV_LOG(ERR, "Invalid response from host, count=%u resp=%p host_sync_addr=%p\n",
282                             count, resp_addr, avp->host_sync_addr);
283                 ret = -ENODATA;
284                 goto done;
285         }
286
287         /* copy to user buffer */
288         rte_memcpy(request, avp->sync_addr, sizeof(*request));
289         ret = 0;
290
291         PMD_DRV_LOG(DEBUG, "Result %d received for request %u\n",
292                     request->result, request->req_id);
293
294 done:
295         return ret;
296 }
297
298 static int
299 avp_dev_ctrl_set_link_state(struct rte_eth_dev *eth_dev, unsigned int state)
300 {
301         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
302         struct rte_avp_request request;
303         int ret;
304
305         /* setup a link state change request */
306         memset(&request, 0, sizeof(request));
307         request.req_id = RTE_AVP_REQ_CFG_NETWORK_IF;
308         request.if_up = state;
309
310         ret = avp_dev_process_request(avp, &request);
311
312         return ret == 0 ? request.result : ret;
313 }
314
315 static int
316 avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev,
317                         struct rte_avp_device_config *config)
318 {
319         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
320         struct rte_avp_request request;
321         int ret;
322
323         /* setup a configure request */
324         memset(&request, 0, sizeof(request));
325         request.req_id = RTE_AVP_REQ_CFG_DEVICE;
326         memcpy(&request.config, config, sizeof(request.config));
327
328         ret = avp_dev_process_request(avp, &request);
329
330         return ret == 0 ? request.result : ret;
331 }
332
333 static int
334 avp_dev_ctrl_shutdown(struct rte_eth_dev *eth_dev)
335 {
336         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
337         struct rte_avp_request request;
338         int ret;
339
340         /* setup a shutdown request */
341         memset(&request, 0, sizeof(request));
342         request.req_id = RTE_AVP_REQ_SHUTDOWN_DEVICE;
343
344         ret = avp_dev_process_request(avp, &request);
345
346         return ret == 0 ? request.result : ret;
347 }
348
349 /* translate from host mbuf virtual address to guest virtual address */
350 static inline void *
351 avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address)
352 {
353         return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address,
354                                        (uintptr_t)avp->host_mbuf_addr),
355                            (uintptr_t)avp->mbuf_addr);
356 }
357
358 /* translate from host physical address to guest virtual address */
359 static void *
360 avp_dev_translate_address(struct rte_eth_dev *eth_dev,
361                           rte_iova_t host_phys_addr)
362 {
363         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
364         struct rte_mem_resource *resource;
365         struct rte_avp_memmap_info *info;
366         struct rte_avp_memmap *map;
367         off_t offset;
368         void *addr;
369         unsigned int i;
370
371         addr = pci_dev->mem_resource[RTE_AVP_PCI_MEMORY_BAR].addr;
372         resource = &pci_dev->mem_resource[RTE_AVP_PCI_MEMMAP_BAR];
373         info = (struct rte_avp_memmap_info *)resource->addr;
374
375         offset = 0;
376         for (i = 0; i < info->nb_maps; i++) {
377                 /* search all segments looking for a matching address */
378                 map = &info->maps[i];
379
380                 if ((host_phys_addr >= map->phys_addr) &&
381                         (host_phys_addr < (map->phys_addr + map->length))) {
382                         /* address is within this segment */
383                         offset += (host_phys_addr - map->phys_addr);
384                         addr = RTE_PTR_ADD(addr, (uintptr_t)offset);
385
386                         PMD_DRV_LOG(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p\n",
387                                     host_phys_addr, addr);
388
389                         return addr;
390                 }
391                 offset += map->length;
392         }
393
394         return NULL;
395 }
396
397 /* verify that the incoming device version is compatible with our version */
398 static int
399 avp_dev_version_check(uint32_t version)
400 {
401         uint32_t driver = RTE_AVP_STRIP_MINOR_VERSION(AVP_DPDK_DRIVER_VERSION);
402         uint32_t device = RTE_AVP_STRIP_MINOR_VERSION(version);
403
404         if (device <= driver) {
405                 /* the host driver version is less than or equal to ours */
406                 return 0;
407         }
408
409         return 1;
410 }
411
412 /* verify that memory regions have expected version and validation markers */
413 static int
414 avp_dev_check_regions(struct rte_eth_dev *eth_dev)
415 {
416         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
417         struct rte_avp_memmap_info *memmap;
418         struct rte_avp_device_info *info;
419         struct rte_mem_resource *resource;
420         unsigned int i;
421
422         /* Dump resource info for debug */
423         for (i = 0; i < PCI_MAX_RESOURCE; i++) {
424                 resource = &pci_dev->mem_resource[i];
425                 if ((resource->phys_addr == 0) || (resource->len == 0))
426                         continue;
427
428                 PMD_DRV_LOG(DEBUG, "resource[%u]: phys=0x%" PRIx64 " len=%" PRIu64 " addr=%p\n",
429                             i, resource->phys_addr,
430                             resource->len, resource->addr);
431
432                 switch (i) {
433                 case RTE_AVP_PCI_MEMMAP_BAR:
434                         memmap = (struct rte_avp_memmap_info *)resource->addr;
435                         if ((memmap->magic != RTE_AVP_MEMMAP_MAGIC) ||
436                             (memmap->version != RTE_AVP_MEMMAP_VERSION)) {
437                                 PMD_DRV_LOG(ERR, "Invalid memmap magic 0x%08x and version %u\n",
438                                             memmap->magic, memmap->version);
439                                 return -EINVAL;
440                         }
441                         break;
442
443                 case RTE_AVP_PCI_DEVICE_BAR:
444                         info = (struct rte_avp_device_info *)resource->addr;
445                         if ((info->magic != RTE_AVP_DEVICE_MAGIC) ||
446                             avp_dev_version_check(info->version)) {
447                                 PMD_DRV_LOG(ERR, "Invalid device info magic 0x%08x or version 0x%08x > 0x%08x\n",
448                                             info->magic, info->version,
449                                             AVP_DPDK_DRIVER_VERSION);
450                                 return -EINVAL;
451                         }
452                         break;
453
454                 case RTE_AVP_PCI_MEMORY_BAR:
455                 case RTE_AVP_PCI_MMIO_BAR:
456                         if (resource->addr == NULL) {
457                                 PMD_DRV_LOG(ERR, "Missing address space for BAR%u\n",
458                                             i);
459                                 return -EINVAL;
460                         }
461                         break;
462
463                 case RTE_AVP_PCI_MSIX_BAR:
464                 default:
465                         /* no validation required */
466                         break;
467                 }
468         }
469
470         return 0;
471 }
472
473 static int
474 avp_dev_detach(struct rte_eth_dev *eth_dev)
475 {
476         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
477         int ret;
478
479         PMD_DRV_LOG(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "\n",
480                     eth_dev->data->port_id, avp->device_id);
481
482         rte_spinlock_lock(&avp->lock);
483
484         if (avp->flags & AVP_F_DETACHED) {
485                 PMD_DRV_LOG(NOTICE, "port %u already detached\n",
486                             eth_dev->data->port_id);
487                 ret = 0;
488                 goto unlock;
489         }
490
491         /* shutdown the device first so the host stops sending us packets. */
492         ret = avp_dev_ctrl_shutdown(eth_dev);
493         if (ret < 0) {
494                 PMD_DRV_LOG(ERR, "Failed to send/recv shutdown to host, ret=%d\n",
495                             ret);
496                 avp->flags &= ~AVP_F_DETACHED;
497                 goto unlock;
498         }
499
500         avp->flags |= AVP_F_DETACHED;
501         rte_wmb();
502
503         /* wait for queues to acknowledge the presence of the detach flag */
504         rte_delay_ms(1);
505
506         ret = 0;
507
508 unlock:
509         rte_spinlock_unlock(&avp->lock);
510         return ret;
511 }
512
513 static void
514 _avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
515 {
516         struct avp_dev *avp =
517                 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
518         struct avp_queue *rxq;
519         uint16_t queue_count;
520         uint16_t remainder;
521
522         rxq = (struct avp_queue *)eth_dev->data->rx_queues[rx_queue_id];
523
524         /*
525          * Must map all AVP fifos as evenly as possible between the configured
526          * device queues.  Each device queue will service a subset of the AVP
527          * fifos. If there is an odd number of device queues the first set of
528          * device queues will get the extra AVP fifos.
529          */
530         queue_count = avp->num_rx_queues / eth_dev->data->nb_rx_queues;
531         remainder = avp->num_rx_queues % eth_dev->data->nb_rx_queues;
532         if (rx_queue_id < remainder) {
533                 /* these queues must service one extra FIFO */
534                 rxq->queue_base = rx_queue_id * (queue_count + 1);
535                 rxq->queue_limit = rxq->queue_base + (queue_count + 1) - 1;
536         } else {
537                 /* these queues service the regular number of FIFO */
538                 rxq->queue_base = ((remainder * (queue_count + 1)) +
539                                    ((rx_queue_id - remainder) * queue_count));
540                 rxq->queue_limit = rxq->queue_base + queue_count - 1;
541         }
542
543         PMD_DRV_LOG(DEBUG, "rxq %u at %p base %u limit %u\n",
544                     rx_queue_id, rxq, rxq->queue_base, rxq->queue_limit);
545
546         rxq->queue_id = rxq->queue_base;
547 }
548
549 static void
550 _avp_set_queue_counts(struct rte_eth_dev *eth_dev)
551 {
552         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
553         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
554         struct rte_avp_device_info *host_info;
555         void *addr;
556
557         addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
558         host_info = (struct rte_avp_device_info *)addr;
559
560         /*
561          * the transmit direction is not negotiated beyond respecting the max
562          * number of queues because the host can handle arbitrary guest tx
563          * queues (host rx queues).
564          */
565         avp->num_tx_queues = eth_dev->data->nb_tx_queues;
566
567         /*
568          * the receive direction is more restrictive.  The host requires a
569          * minimum number of guest rx queues (host tx queues) therefore
570          * negotiate a value that is at least as large as the host minimum
571          * requirement.  If the host and guest values are not identical then a
572          * mapping will be established in the receive_queue_setup function.
573          */
574         avp->num_rx_queues = RTE_MAX(host_info->min_rx_queues,
575                                      eth_dev->data->nb_rx_queues);
576
577         PMD_DRV_LOG(DEBUG, "Requesting %u Tx and %u Rx queues from host\n",
578                     avp->num_tx_queues, avp->num_rx_queues);
579 }
580
581 static int
582 avp_dev_attach(struct rte_eth_dev *eth_dev)
583 {
584         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
585         struct rte_avp_device_config config;
586         unsigned int i;
587         int ret;
588
589         PMD_DRV_LOG(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "\n",
590                     eth_dev->data->port_id, avp->device_id);
591
592         rte_spinlock_lock(&avp->lock);
593
594         if (!(avp->flags & AVP_F_DETACHED)) {
595                 PMD_DRV_LOG(NOTICE, "port %u already attached\n",
596                             eth_dev->data->port_id);
597                 ret = 0;
598                 goto unlock;
599         }
600
601         /*
602          * make sure that the detached flag is set prior to reconfiguring the
603          * queues.
604          */
605         avp->flags |= AVP_F_DETACHED;
606         rte_wmb();
607
608         /*
609          * re-run the device create utility which will parse the new host info
610          * and setup the AVP device queue pointers.
611          */
612         ret = avp_dev_create(RTE_ETH_DEV_TO_PCI(eth_dev), eth_dev);
613         if (ret < 0) {
614                 PMD_DRV_LOG(ERR, "Failed to re-create AVP device, ret=%d\n",
615                             ret);
616                 goto unlock;
617         }
618
619         if (avp->flags & AVP_F_CONFIGURED) {
620                 /*
621                  * Update the receive queue mapping to handle cases where the
622                  * source and destination hosts have different queue
623                  * requirements.  As long as the DETACHED flag is asserted the
624                  * queue table should not be referenced so it should be safe to
625                  * update it.
626                  */
627                 _avp_set_queue_counts(eth_dev);
628                 for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
629                         _avp_set_rx_queue_mappings(eth_dev, i);
630
631                 /*
632                  * Update the host with our config details so that it knows the
633                  * device is active.
634                  */
635                 memset(&config, 0, sizeof(config));
636                 config.device_id = avp->device_id;
637                 config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
638                 config.driver_version = AVP_DPDK_DRIVER_VERSION;
639                 config.features = avp->features;
640                 config.num_tx_queues = avp->num_tx_queues;
641                 config.num_rx_queues = avp->num_rx_queues;
642                 config.if_up = !!(avp->flags & AVP_F_LINKUP);
643
644                 ret = avp_dev_ctrl_set_config(eth_dev, &config);
645                 if (ret < 0) {
646                         PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
647                                     ret);
648                         goto unlock;
649                 }
650         }
651
652         rte_wmb();
653         avp->flags &= ~AVP_F_DETACHED;
654
655         ret = 0;
656
657 unlock:
658         rte_spinlock_unlock(&avp->lock);
659         return ret;
660 }
661
662 static void
663 avp_dev_interrupt_handler(void *data)
664 {
665         struct rte_eth_dev *eth_dev = data;
666         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
667         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
668         uint32_t status, value;
669         int ret;
670
671         if (registers == NULL)
672                 rte_panic("no mapped MMIO register space\n");
673
674         /* read the interrupt status register
675          * note: this register clears on read so all raised interrupts must be
676          *    handled or remembered for later processing
677          */
678         status = AVP_READ32(
679                 RTE_PTR_ADD(registers,
680                             RTE_AVP_INTERRUPT_STATUS_OFFSET));
681
682         if (status & RTE_AVP_MIGRATION_INTERRUPT_MASK) {
683                 /* handle interrupt based on current status */
684                 value = AVP_READ32(
685                         RTE_PTR_ADD(registers,
686                                     RTE_AVP_MIGRATION_STATUS_OFFSET));
687                 switch (value) {
688                 case RTE_AVP_MIGRATION_DETACHED:
689                         ret = avp_dev_detach(eth_dev);
690                         break;
691                 case RTE_AVP_MIGRATION_ATTACHED:
692                         ret = avp_dev_attach(eth_dev);
693                         break;
694                 default:
695                         PMD_DRV_LOG(ERR, "unexpected migration status, status=%u\n",
696                                     value);
697                         ret = -EINVAL;
698                 }
699
700                 /* acknowledge the request by writing out our current status */
701                 value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR);
702                 AVP_WRITE32(value,
703                             RTE_PTR_ADD(registers,
704                                         RTE_AVP_MIGRATION_ACK_OFFSET));
705
706                 PMD_DRV_LOG(NOTICE, "AVP migration interrupt handled\n");
707         }
708
709         if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK)
710                 PMD_DRV_LOG(WARNING, "AVP unexpected interrupt, status=0x%08x\n",
711                             status);
712
713         /* re-enable UIO interrupt handling */
714         ret = rte_intr_ack(&pci_dev->intr_handle);
715         if (ret < 0) {
716                 PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n",
717                             ret);
718                 /* continue */
719         }
720 }
721
722 static int
723 avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev)
724 {
725         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
726         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
727         int ret;
728
729         if (registers == NULL)
730                 return -EINVAL;
731
732         /* enable UIO interrupt handling */
733         ret = rte_intr_enable(&pci_dev->intr_handle);
734         if (ret < 0) {
735                 PMD_DRV_LOG(ERR, "Failed to enable UIO interrupts, ret=%d\n",
736                             ret);
737                 return ret;
738         }
739
740         /* inform the device that all interrupts are enabled */
741         AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK,
742                     RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
743
744         return 0;
745 }
746
747 static int
748 avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev)
749 {
750         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
751         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
752         int ret;
753
754         if (registers == NULL)
755                 return 0;
756
757         /* inform the device that all interrupts are disabled */
758         AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK,
759                     RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
760
761         /* enable UIO interrupt handling */
762         ret = rte_intr_disable(&pci_dev->intr_handle);
763         if (ret < 0) {
764                 PMD_DRV_LOG(ERR, "Failed to disable UIO interrupts, ret=%d\n",
765                             ret);
766                 return ret;
767         }
768
769         return 0;
770 }
771
772 static int
773 avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev)
774 {
775         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
776         int ret;
777
778         /* register a callback handler with UIO for interrupt notifications */
779         ret = rte_intr_callback_register(&pci_dev->intr_handle,
780                                          avp_dev_interrupt_handler,
781                                          (void *)eth_dev);
782         if (ret < 0) {
783                 PMD_DRV_LOG(ERR, "Failed to register UIO interrupt callback, ret=%d\n",
784                             ret);
785                 return ret;
786         }
787
788         /* enable interrupt processing */
789         return avp_dev_enable_interrupts(eth_dev);
790 }
791
792 static int
793 avp_dev_migration_pending(struct rte_eth_dev *eth_dev)
794 {
795         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
796         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
797         uint32_t value;
798
799         if (registers == NULL)
800                 return 0;
801
802         value = AVP_READ32(RTE_PTR_ADD(registers,
803                                        RTE_AVP_MIGRATION_STATUS_OFFSET));
804         if (value == RTE_AVP_MIGRATION_DETACHED) {
805                 /* migration is in progress; ack it if we have not already */
806                 AVP_WRITE32(value,
807                             RTE_PTR_ADD(registers,
808                                         RTE_AVP_MIGRATION_ACK_OFFSET));
809                 return 1;
810         }
811         return 0;
812 }
813
814 /*
815  * create a AVP device using the supplied device info by first translating it
816  * to guest address space(s).
817  */
818 static int
819 avp_dev_create(struct rte_pci_device *pci_dev,
820                struct rte_eth_dev *eth_dev)
821 {
822         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
823         struct rte_avp_device_info *host_info;
824         struct rte_mem_resource *resource;
825         unsigned int i;
826
827         resource = &pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR];
828         if (resource->addr == NULL) {
829                 PMD_DRV_LOG(ERR, "BAR%u is not mapped\n",
830                             RTE_AVP_PCI_DEVICE_BAR);
831                 return -EFAULT;
832         }
833         host_info = (struct rte_avp_device_info *)resource->addr;
834
835         if ((host_info->magic != RTE_AVP_DEVICE_MAGIC) ||
836                 avp_dev_version_check(host_info->version)) {
837                 PMD_DRV_LOG(ERR, "Invalid AVP PCI device, magic 0x%08x version 0x%08x > 0x%08x\n",
838                             host_info->magic, host_info->version,
839                             AVP_DPDK_DRIVER_VERSION);
840                 return -EINVAL;
841         }
842
843         PMD_DRV_LOG(DEBUG, "AVP host device is v%u.%u.%u\n",
844                     RTE_AVP_GET_RELEASE_VERSION(host_info->version),
845                     RTE_AVP_GET_MAJOR_VERSION(host_info->version),
846                     RTE_AVP_GET_MINOR_VERSION(host_info->version));
847
848         PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u TX queue(s)\n",
849                     host_info->min_tx_queues, host_info->max_tx_queues);
850         PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u RX queue(s)\n",
851                     host_info->min_rx_queues, host_info->max_rx_queues);
852         PMD_DRV_LOG(DEBUG, "AVP host supports features 0x%08x\n",
853                     host_info->features);
854
855         if (avp->magic != AVP_ETHDEV_MAGIC) {
856                 /*
857                  * First time initialization (i.e., not during a VM
858                  * migration)
859                  */
860                 memset(avp, 0, sizeof(*avp));
861                 avp->magic = AVP_ETHDEV_MAGIC;
862                 avp->dev_data = eth_dev->data;
863                 avp->port_id = eth_dev->data->port_id;
864                 avp->host_mbuf_size = host_info->mbuf_size;
865                 avp->host_features = host_info->features;
866                 rte_spinlock_init(&avp->lock);
867                 memcpy(&avp->ethaddr.addr_bytes[0],
868                        host_info->ethaddr, RTE_ETHER_ADDR_LEN);
869                 /* adjust max values to not exceed our max */
870                 avp->max_tx_queues =
871                         RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES);
872                 avp->max_rx_queues =
873                         RTE_MIN(host_info->max_rx_queues, RTE_AVP_MAX_QUEUES);
874         } else {
875                 /* Re-attaching during migration */
876
877                 /* TODO... requires validation of host values */
878                 if ((host_info->features & avp->features) != avp->features) {
879                         PMD_DRV_LOG(ERR, "AVP host features mismatched; 0x%08x, host=0x%08x\n",
880                                     avp->features, host_info->features);
881                         /* this should not be possible; continue for now */
882                 }
883         }
884
885         /* the device id is allowed to change over migrations */
886         avp->device_id = host_info->device_id;
887
888         /* translate incoming host addresses to guest address space */
889         PMD_DRV_LOG(DEBUG, "AVP first host tx queue at 0x%" PRIx64 "\n",
890                     host_info->tx_phys);
891         PMD_DRV_LOG(DEBUG, "AVP first host alloc queue at 0x%" PRIx64 "\n",
892                     host_info->alloc_phys);
893         for (i = 0; i < avp->max_tx_queues; i++) {
894                 avp->tx_q[i] = avp_dev_translate_address(eth_dev,
895                         host_info->tx_phys + (i * host_info->tx_size));
896
897                 avp->alloc_q[i] = avp_dev_translate_address(eth_dev,
898                         host_info->alloc_phys + (i * host_info->alloc_size));
899         }
900
901         PMD_DRV_LOG(DEBUG, "AVP first host rx queue at 0x%" PRIx64 "\n",
902                     host_info->rx_phys);
903         PMD_DRV_LOG(DEBUG, "AVP first host free queue at 0x%" PRIx64 "\n",
904                     host_info->free_phys);
905         for (i = 0; i < avp->max_rx_queues; i++) {
906                 avp->rx_q[i] = avp_dev_translate_address(eth_dev,
907                         host_info->rx_phys + (i * host_info->rx_size));
908                 avp->free_q[i] = avp_dev_translate_address(eth_dev,
909                         host_info->free_phys + (i * host_info->free_size));
910         }
911
912         PMD_DRV_LOG(DEBUG, "AVP host request queue at 0x%" PRIx64 "\n",
913                     host_info->req_phys);
914         PMD_DRV_LOG(DEBUG, "AVP host response queue at 0x%" PRIx64 "\n",
915                     host_info->resp_phys);
916         PMD_DRV_LOG(DEBUG, "AVP host sync address at 0x%" PRIx64 "\n",
917                     host_info->sync_phys);
918         PMD_DRV_LOG(DEBUG, "AVP host mbuf address at 0x%" PRIx64 "\n",
919                     host_info->mbuf_phys);
920         avp->req_q = avp_dev_translate_address(eth_dev, host_info->req_phys);
921         avp->resp_q = avp_dev_translate_address(eth_dev, host_info->resp_phys);
922         avp->sync_addr =
923                 avp_dev_translate_address(eth_dev, host_info->sync_phys);
924         avp->mbuf_addr =
925                 avp_dev_translate_address(eth_dev, host_info->mbuf_phys);
926
927         /*
928          * store the host mbuf virtual address so that we can calculate
929          * relative offsets for each mbuf as they are processed
930          */
931         avp->host_mbuf_addr = host_info->mbuf_va;
932         avp->host_sync_addr = host_info->sync_va;
933
934         /*
935          * store the maximum packet length that is supported by the host.
936          */
937         avp->max_rx_pkt_len = host_info->max_rx_pkt_len;
938         PMD_DRV_LOG(DEBUG, "AVP host max receive packet length is %u\n",
939                                 host_info->max_rx_pkt_len);
940
941         return 0;
942 }
943
944 /*
945  * This function is based on probe() function in avp_pci.c
946  * It returns 0 on success.
947  */
948 static int
949 eth_avp_dev_init(struct rte_eth_dev *eth_dev)
950 {
951         struct avp_dev *avp =
952                 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
953         struct rte_pci_device *pci_dev;
954         int ret;
955
956         pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
957         eth_dev->dev_ops = &avp_eth_dev_ops;
958         eth_dev->rx_pkt_burst = &avp_recv_pkts;
959         eth_dev->tx_pkt_burst = &avp_xmit_pkts;
960         /* Let rte_eth_dev_close() release the port resources */
961         eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
962
963         if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
964                 /*
965                  * no setup required on secondary processes.  All data is saved
966                  * in dev_private by the primary process. All resource should
967                  * be mapped to the same virtual address so all pointers should
968                  * be valid.
969                  */
970                 if (eth_dev->data->scattered_rx) {
971                         PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
972                         eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
973                         eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
974                 }
975                 return 0;
976         }
977
978         rte_eth_copy_pci_info(eth_dev, pci_dev);
979
980         /* Check current migration status */
981         if (avp_dev_migration_pending(eth_dev)) {
982                 PMD_DRV_LOG(ERR, "VM live migration operation in progress\n");
983                 return -EBUSY;
984         }
985
986         /* Check BAR resources */
987         ret = avp_dev_check_regions(eth_dev);
988         if (ret < 0) {
989                 PMD_DRV_LOG(ERR, "Failed to validate BAR resources, ret=%d\n",
990                             ret);
991                 return ret;
992         }
993
994         /* Enable interrupts */
995         ret = avp_dev_setup_interrupts(eth_dev);
996         if (ret < 0) {
997                 PMD_DRV_LOG(ERR, "Failed to enable interrupts, ret=%d\n", ret);
998                 return ret;
999         }
1000
1001         /* Handle each subtype */
1002         ret = avp_dev_create(pci_dev, eth_dev);
1003         if (ret < 0) {
1004                 PMD_DRV_LOG(ERR, "Failed to create device, ret=%d\n", ret);
1005                 return ret;
1006         }
1007
1008         /* Allocate memory for storing MAC addresses */
1009         eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev",
1010                                         RTE_ETHER_ADDR_LEN, 0);
1011         if (eth_dev->data->mac_addrs == NULL) {
1012                 PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to store MAC addresses\n",
1013                             RTE_ETHER_ADDR_LEN);
1014                 return -ENOMEM;
1015         }
1016
1017         /* Get a mac from device config */
1018         rte_ether_addr_copy(&avp->ethaddr, &eth_dev->data->mac_addrs[0]);
1019
1020         return 0;
1021 }
1022
1023 static int
1024 eth_avp_dev_uninit(struct rte_eth_dev *eth_dev)
1025 {
1026         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1027                 return -EPERM;
1028
1029         if (eth_dev->data == NULL)
1030                 return 0;
1031
1032         avp_dev_close(eth_dev);
1033
1034         return 0;
1035 }
1036
1037 static int
1038 eth_avp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
1039                   struct rte_pci_device *pci_dev)
1040 {
1041         return rte_eth_dev_pci_generic_probe(pci_dev, sizeof(struct avp_adapter),
1042                         eth_avp_dev_init);
1043 }
1044
1045 static int
1046 eth_avp_pci_remove(struct rte_pci_device *pci_dev)
1047 {
1048         return rte_eth_dev_pci_generic_remove(pci_dev,
1049                                               eth_avp_dev_uninit);
1050 }
1051
1052 static struct rte_pci_driver rte_avp_pmd = {
1053         .id_table = pci_id_avp_map,
1054         .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
1055         .probe = eth_avp_pci_probe,
1056         .remove = eth_avp_pci_remove,
1057 };
1058
1059 static int
1060 avp_dev_enable_scattered(struct rte_eth_dev *eth_dev,
1061                          struct avp_dev *avp)
1062 {
1063         unsigned int max_rx_pkt_len;
1064
1065         max_rx_pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
1066
1067         if ((max_rx_pkt_len > avp->guest_mbuf_size) ||
1068             (max_rx_pkt_len > avp->host_mbuf_size)) {
1069                 /*
1070                  * If the guest MTU is greater than either the host or guest
1071                  * buffers then chained mbufs have to be enabled in the TX
1072                  * direction.  It is assumed that the application will not need
1073                  * to send packets larger than their max_rx_pkt_len (MRU).
1074                  */
1075                 return 1;
1076         }
1077
1078         if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) ||
1079             (avp->max_rx_pkt_len > avp->host_mbuf_size)) {
1080                 /*
1081                  * If the host MRU is greater than its own mbuf size or the
1082                  * guest mbuf size then chained mbufs have to be enabled in the
1083                  * RX direction.
1084                  */
1085                 return 1;
1086         }
1087
1088         return 0;
1089 }
1090
1091 static int
1092 avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
1093                        uint16_t rx_queue_id,
1094                        uint16_t nb_rx_desc,
1095                        unsigned int socket_id,
1096                        const struct rte_eth_rxconf *rx_conf,
1097                        struct rte_mempool *pool)
1098 {
1099         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1100         struct rte_pktmbuf_pool_private *mbp_priv;
1101         struct avp_queue *rxq;
1102
1103         if (rx_queue_id >= eth_dev->data->nb_rx_queues) {
1104                 PMD_DRV_LOG(ERR, "RX queue id is out of range: rx_queue_id=%u, nb_rx_queues=%u\n",
1105                             rx_queue_id, eth_dev->data->nb_rx_queues);
1106                 return -EINVAL;
1107         }
1108
1109         /* Save mbuf pool pointer */
1110         avp->pool = pool;
1111
1112         /* Save the local mbuf size */
1113         mbp_priv = rte_mempool_get_priv(pool);
1114         avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size);
1115         avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM;
1116
1117         if (avp_dev_enable_scattered(eth_dev, avp)) {
1118                 if (!eth_dev->data->scattered_rx) {
1119                         PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
1120                         eth_dev->data->scattered_rx = 1;
1121                         eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1122                         eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1123                 }
1124         }
1125
1126         PMD_DRV_LOG(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)\n",
1127                     avp->max_rx_pkt_len,
1128                     eth_dev->data->dev_conf.rxmode.max_rx_pkt_len,
1129                     avp->host_mbuf_size,
1130                     avp->guest_mbuf_size);
1131
1132         /* allocate a queue object */
1133         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct avp_queue),
1134                                  RTE_CACHE_LINE_SIZE, socket_id);
1135         if (rxq == NULL) {
1136                 PMD_DRV_LOG(ERR, "Failed to allocate new Rx queue object\n");
1137                 return -ENOMEM;
1138         }
1139
1140         /* save back pointers to AVP and Ethernet devices */
1141         rxq->avp = avp;
1142         rxq->dev_data = eth_dev->data;
1143         eth_dev->data->rx_queues[rx_queue_id] = (void *)rxq;
1144
1145         /* setup the queue receive mapping for the current queue. */
1146         _avp_set_rx_queue_mappings(eth_dev, rx_queue_id);
1147
1148         PMD_DRV_LOG(DEBUG, "Rx queue %u setup at %p\n", rx_queue_id, rxq);
1149
1150         (void)nb_rx_desc;
1151         (void)rx_conf;
1152         return 0;
1153 }
1154
1155 static int
1156 avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev,
1157                        uint16_t tx_queue_id,
1158                        uint16_t nb_tx_desc,
1159                        unsigned int socket_id,
1160                        const struct rte_eth_txconf *tx_conf)
1161 {
1162         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1163         struct avp_queue *txq;
1164
1165         if (tx_queue_id >= eth_dev->data->nb_tx_queues) {
1166                 PMD_DRV_LOG(ERR, "TX queue id is out of range: tx_queue_id=%u, nb_tx_queues=%u\n",
1167                             tx_queue_id, eth_dev->data->nb_tx_queues);
1168                 return -EINVAL;
1169         }
1170
1171         /* allocate a queue object */
1172         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct avp_queue),
1173                                  RTE_CACHE_LINE_SIZE, socket_id);
1174         if (txq == NULL) {
1175                 PMD_DRV_LOG(ERR, "Failed to allocate new Tx queue object\n");
1176                 return -ENOMEM;
1177         }
1178
1179         /* only the configured set of transmit queues are used */
1180         txq->queue_id = tx_queue_id;
1181         txq->queue_base = tx_queue_id;
1182         txq->queue_limit = tx_queue_id;
1183
1184         /* save back pointers to AVP and Ethernet devices */
1185         txq->avp = avp;
1186         txq->dev_data = eth_dev->data;
1187         eth_dev->data->tx_queues[tx_queue_id] = (void *)txq;
1188
1189         PMD_DRV_LOG(DEBUG, "Tx queue %u setup at %p\n", tx_queue_id, txq);
1190
1191         (void)nb_tx_desc;
1192         (void)tx_conf;
1193         return 0;
1194 }
1195
1196 static inline int
1197 _avp_cmp_ether_addr(struct rte_ether_addr *a, struct rte_ether_addr *b)
1198 {
1199         uint16_t *_a = (uint16_t *)&a->addr_bytes[0];
1200         uint16_t *_b = (uint16_t *)&b->addr_bytes[0];
1201         return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]);
1202 }
1203
1204 static inline int
1205 _avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m)
1206 {
1207         struct rte_ether_hdr *eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
1208
1209         if (likely(_avp_cmp_ether_addr(&avp->ethaddr, &eth->d_addr) == 0)) {
1210                 /* allow all packets destined to our address */
1211                 return 0;
1212         }
1213
1214         if (likely(rte_is_broadcast_ether_addr(&eth->d_addr))) {
1215                 /* allow all broadcast packets */
1216                 return 0;
1217         }
1218
1219         if (likely(rte_is_multicast_ether_addr(&eth->d_addr))) {
1220                 /* allow all multicast packets */
1221                 return 0;
1222         }
1223
1224         if (avp->flags & AVP_F_PROMISC) {
1225                 /* allow all packets when in promiscuous mode */
1226                 return 0;
1227         }
1228
1229         return -1;
1230 }
1231
1232 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1233 static inline void
1234 __avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf)
1235 {
1236         struct rte_avp_desc *first_buf;
1237         struct rte_avp_desc *pkt_buf;
1238         unsigned int pkt_len;
1239         unsigned int nb_segs;
1240         void *pkt_data;
1241         unsigned int i;
1242
1243         first_buf = avp_dev_translate_buffer(avp, buf);
1244
1245         i = 0;
1246         pkt_len = 0;
1247         nb_segs = first_buf->nb_segs;
1248         do {
1249                 /* Adjust pointers for guest addressing */
1250                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1251                 if (pkt_buf == NULL)
1252                         rte_panic("bad buffer: segment %u has an invalid address %p\n",
1253                                   i, buf);
1254                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1255                 if (pkt_data == NULL)
1256                         rte_panic("bad buffer: segment %u has a NULL data pointer\n",
1257                                   i);
1258                 if (pkt_buf->data_len == 0)
1259                         rte_panic("bad buffer: segment %u has 0 data length\n",
1260                                   i);
1261                 pkt_len += pkt_buf->data_len;
1262                 nb_segs--;
1263                 i++;
1264
1265         } while (nb_segs && (buf = pkt_buf->next) != NULL);
1266
1267         if (nb_segs != 0)
1268                 rte_panic("bad buffer: expected %u segments found %u\n",
1269                           first_buf->nb_segs, (first_buf->nb_segs - nb_segs));
1270         if (pkt_len != first_buf->pkt_len)
1271                 rte_panic("bad buffer: expected length %u found %u\n",
1272                           first_buf->pkt_len, pkt_len);
1273 }
1274
1275 #define avp_dev_buffer_sanity_check(a, b) \
1276         __avp_dev_buffer_sanity_check((a), (b))
1277
1278 #else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */
1279
1280 #define avp_dev_buffer_sanity_check(a, b) do {} while (0)
1281
1282 #endif
1283
1284 /*
1285  * Copy a host buffer chain to a set of mbufs.  This function assumes that
1286  * there exactly the required number of mbufs to copy all source bytes.
1287  */
1288 static inline struct rte_mbuf *
1289 avp_dev_copy_from_buffers(struct avp_dev *avp,
1290                           struct rte_avp_desc *buf,
1291                           struct rte_mbuf **mbufs,
1292                           unsigned int count)
1293 {
1294         struct rte_mbuf *m_previous = NULL;
1295         struct rte_avp_desc *pkt_buf;
1296         unsigned int total_length = 0;
1297         unsigned int copy_length;
1298         unsigned int src_offset;
1299         struct rte_mbuf *m;
1300         uint16_t ol_flags;
1301         uint16_t vlan_tci;
1302         void *pkt_data;
1303         unsigned int i;
1304
1305         avp_dev_buffer_sanity_check(avp, buf);
1306
1307         /* setup the first source buffer */
1308         pkt_buf = avp_dev_translate_buffer(avp, buf);
1309         pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1310         total_length = pkt_buf->pkt_len;
1311         src_offset = 0;
1312
1313         if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1314                 ol_flags = PKT_RX_VLAN;
1315                 vlan_tci = pkt_buf->vlan_tci;
1316         } else {
1317                 ol_flags = 0;
1318                 vlan_tci = 0;
1319         }
1320
1321         for (i = 0; (i < count) && (buf != NULL); i++) {
1322                 /* fill each destination buffer */
1323                 m = mbufs[i];
1324
1325                 if (m_previous != NULL)
1326                         m_previous->next = m;
1327
1328                 m_previous = m;
1329
1330                 do {
1331                         /*
1332                          * Copy as many source buffers as will fit in the
1333                          * destination buffer.
1334                          */
1335                         copy_length = RTE_MIN((avp->guest_mbuf_size -
1336                                                rte_pktmbuf_data_len(m)),
1337                                               (pkt_buf->data_len -
1338                                                src_offset));
1339                         rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1340                                                rte_pktmbuf_data_len(m)),
1341                                    RTE_PTR_ADD(pkt_data, src_offset),
1342                                    copy_length);
1343                         rte_pktmbuf_data_len(m) += copy_length;
1344                         src_offset += copy_length;
1345
1346                         if (likely(src_offset == pkt_buf->data_len)) {
1347                                 /* need a new source buffer */
1348                                 buf = pkt_buf->next;
1349                                 if (buf != NULL) {
1350                                         pkt_buf = avp_dev_translate_buffer(
1351                                                 avp, buf);
1352                                         pkt_data = avp_dev_translate_buffer(
1353                                                 avp, pkt_buf->data);
1354                                         src_offset = 0;
1355                                 }
1356                         }
1357
1358                         if (unlikely(rte_pktmbuf_data_len(m) ==
1359                                      avp->guest_mbuf_size)) {
1360                                 /* need a new destination mbuf */
1361                                 break;
1362                         }
1363
1364                 } while (buf != NULL);
1365         }
1366
1367         m = mbufs[0];
1368         m->ol_flags = ol_flags;
1369         m->nb_segs = count;
1370         rte_pktmbuf_pkt_len(m) = total_length;
1371         m->vlan_tci = vlan_tci;
1372
1373         __rte_mbuf_sanity_check(m, 1);
1374
1375         return m;
1376 }
1377
1378 static uint16_t
1379 avp_recv_scattered_pkts(void *rx_queue,
1380                         struct rte_mbuf **rx_pkts,
1381                         uint16_t nb_pkts)
1382 {
1383         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1384         struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1385         struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS];
1386         struct avp_dev *avp = rxq->avp;
1387         struct rte_avp_desc *pkt_buf;
1388         struct rte_avp_fifo *free_q;
1389         struct rte_avp_fifo *rx_q;
1390         struct rte_avp_desc *buf;
1391         unsigned int count, avail, n;
1392         unsigned int guest_mbuf_size;
1393         struct rte_mbuf *m;
1394         unsigned int required;
1395         unsigned int buf_len;
1396         unsigned int port_id;
1397         unsigned int i;
1398
1399         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1400                 /* VM live migration in progress */
1401                 return 0;
1402         }
1403
1404         guest_mbuf_size = avp->guest_mbuf_size;
1405         port_id = avp->port_id;
1406         rx_q = avp->rx_q[rxq->queue_id];
1407         free_q = avp->free_q[rxq->queue_id];
1408
1409         /* setup next queue to service */
1410         rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1411                 (rxq->queue_id + 1) : rxq->queue_base;
1412
1413         /* determine how many slots are available in the free queue */
1414         count = avp_fifo_free_count(free_q);
1415
1416         /* determine how many packets are available in the rx queue */
1417         avail = avp_fifo_count(rx_q);
1418
1419         /* determine how many packets can be received */
1420         count = RTE_MIN(count, avail);
1421         count = RTE_MIN(count, nb_pkts);
1422         count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1423
1424         if (unlikely(count == 0)) {
1425                 /* no free buffers, or no buffers on the rx queue */
1426                 return 0;
1427         }
1428
1429         /* retrieve pending packets */
1430         n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1431         PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1432                    count, rx_q);
1433
1434         count = 0;
1435         for (i = 0; i < n; i++) {
1436                 /* prefetch next entry while processing current one */
1437                 if (i + 1 < n) {
1438                         pkt_buf = avp_dev_translate_buffer(avp,
1439                                                            avp_bufs[i + 1]);
1440                         rte_prefetch0(pkt_buf);
1441                 }
1442                 buf = avp_bufs[i];
1443
1444                 /* Peek into the first buffer to determine the total length */
1445                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1446                 buf_len = pkt_buf->pkt_len;
1447
1448                 /* Allocate enough mbufs to receive the entire packet */
1449                 required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size;
1450                 if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) {
1451                         rxq->dev_data->rx_mbuf_alloc_failed++;
1452                         continue;
1453                 }
1454
1455                 /* Copy the data from the buffers to our mbufs */
1456                 m = avp_dev_copy_from_buffers(avp, buf, mbufs, required);
1457
1458                 /* finalize mbuf */
1459                 m->port = port_id;
1460
1461                 if (_avp_mac_filter(avp, m) != 0) {
1462                         /* silently discard packets not destined to our MAC */
1463                         rte_pktmbuf_free(m);
1464                         continue;
1465                 }
1466
1467                 /* return new mbuf to caller */
1468                 rx_pkts[count++] = m;
1469                 rxq->bytes += buf_len;
1470         }
1471
1472         rxq->packets += count;
1473
1474         /* return the buffers to the free queue */
1475         avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1476
1477         return count;
1478 }
1479
1480
1481 static uint16_t
1482 avp_recv_pkts(void *rx_queue,
1483               struct rte_mbuf **rx_pkts,
1484               uint16_t nb_pkts)
1485 {
1486         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1487         struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1488         struct avp_dev *avp = rxq->avp;
1489         struct rte_avp_desc *pkt_buf;
1490         struct rte_avp_fifo *free_q;
1491         struct rte_avp_fifo *rx_q;
1492         unsigned int count, avail, n;
1493         unsigned int pkt_len;
1494         struct rte_mbuf *m;
1495         char *pkt_data;
1496         unsigned int i;
1497
1498         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1499                 /* VM live migration in progress */
1500                 return 0;
1501         }
1502
1503         rx_q = avp->rx_q[rxq->queue_id];
1504         free_q = avp->free_q[rxq->queue_id];
1505
1506         /* setup next queue to service */
1507         rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1508                 (rxq->queue_id + 1) : rxq->queue_base;
1509
1510         /* determine how many slots are available in the free queue */
1511         count = avp_fifo_free_count(free_q);
1512
1513         /* determine how many packets are available in the rx queue */
1514         avail = avp_fifo_count(rx_q);
1515
1516         /* determine how many packets can be received */
1517         count = RTE_MIN(count, avail);
1518         count = RTE_MIN(count, nb_pkts);
1519         count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1520
1521         if (unlikely(count == 0)) {
1522                 /* no free buffers, or no buffers on the rx queue */
1523                 return 0;
1524         }
1525
1526         /* retrieve pending packets */
1527         n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1528         PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1529                    count, rx_q);
1530
1531         count = 0;
1532         for (i = 0; i < n; i++) {
1533                 /* prefetch next entry while processing current one */
1534                 if (i < n - 1) {
1535                         pkt_buf = avp_dev_translate_buffer(avp,
1536                                                            avp_bufs[i + 1]);
1537                         rte_prefetch0(pkt_buf);
1538                 }
1539
1540                 /* Adjust host pointers for guest addressing */
1541                 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1542                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1543                 pkt_len = pkt_buf->pkt_len;
1544
1545                 if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1546                              (pkt_buf->nb_segs > 1))) {
1547                         /*
1548                          * application should be using the scattered receive
1549                          * function
1550                          */
1551                         rxq->errors++;
1552                         continue;
1553                 }
1554
1555                 /* process each packet to be transmitted */
1556                 m = rte_pktmbuf_alloc(avp->pool);
1557                 if (unlikely(m == NULL)) {
1558                         rxq->dev_data->rx_mbuf_alloc_failed++;
1559                         continue;
1560                 }
1561
1562                 /* copy data out of the host buffer to our buffer */
1563                 m->data_off = RTE_PKTMBUF_HEADROOM;
1564                 rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len);
1565
1566                 /* initialize the local mbuf */
1567                 rte_pktmbuf_data_len(m) = pkt_len;
1568                 rte_pktmbuf_pkt_len(m) = pkt_len;
1569                 m->port = avp->port_id;
1570
1571                 if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1572                         m->ol_flags = PKT_RX_VLAN;
1573                         m->vlan_tci = pkt_buf->vlan_tci;
1574                 }
1575
1576                 if (_avp_mac_filter(avp, m) != 0) {
1577                         /* silently discard packets not destined to our MAC */
1578                         rte_pktmbuf_free(m);
1579                         continue;
1580                 }
1581
1582                 /* return new mbuf to caller */
1583                 rx_pkts[count++] = m;
1584                 rxq->bytes += pkt_len;
1585         }
1586
1587         rxq->packets += count;
1588
1589         /* return the buffers to the free queue */
1590         avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1591
1592         return count;
1593 }
1594
1595 /*
1596  * Copy a chained mbuf to a set of host buffers.  This function assumes that
1597  * there are sufficient destination buffers to contain the entire source
1598  * packet.
1599  */
1600 static inline uint16_t
1601 avp_dev_copy_to_buffers(struct avp_dev *avp,
1602                         struct rte_mbuf *mbuf,
1603                         struct rte_avp_desc **buffers,
1604                         unsigned int count)
1605 {
1606         struct rte_avp_desc *previous_buf = NULL;
1607         struct rte_avp_desc *first_buf = NULL;
1608         struct rte_avp_desc *pkt_buf;
1609         struct rte_avp_desc *buf;
1610         size_t total_length;
1611         struct rte_mbuf *m;
1612         size_t copy_length;
1613         size_t src_offset;
1614         char *pkt_data;
1615         unsigned int i;
1616
1617         __rte_mbuf_sanity_check(mbuf, 1);
1618
1619         m = mbuf;
1620         src_offset = 0;
1621         total_length = rte_pktmbuf_pkt_len(m);
1622         for (i = 0; (i < count) && (m != NULL); i++) {
1623                 /* fill each destination buffer */
1624                 buf = buffers[i];
1625
1626                 if (i < count - 1) {
1627                         /* prefetch next entry while processing this one */
1628                         pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]);
1629                         rte_prefetch0(pkt_buf);
1630                 }
1631
1632                 /* Adjust pointers for guest addressing */
1633                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1634                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1635
1636                 /* setup the buffer chain */
1637                 if (previous_buf != NULL)
1638                         previous_buf->next = buf;
1639                 else
1640                         first_buf = pkt_buf;
1641
1642                 previous_buf = pkt_buf;
1643
1644                 do {
1645                         /*
1646                          * copy as many source mbuf segments as will fit in the
1647                          * destination buffer.
1648                          */
1649                         copy_length = RTE_MIN((avp->host_mbuf_size -
1650                                                pkt_buf->data_len),
1651                                               (rte_pktmbuf_data_len(m) -
1652                                                src_offset));
1653                         rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len),
1654                                    RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1655                                                src_offset),
1656                                    copy_length);
1657                         pkt_buf->data_len += copy_length;
1658                         src_offset += copy_length;
1659
1660                         if (likely(src_offset == rte_pktmbuf_data_len(m))) {
1661                                 /* need a new source buffer */
1662                                 m = m->next;
1663                                 src_offset = 0;
1664                         }
1665
1666                         if (unlikely(pkt_buf->data_len ==
1667                                      avp->host_mbuf_size)) {
1668                                 /* need a new destination buffer */
1669                                 break;
1670                         }
1671
1672                 } while (m != NULL);
1673         }
1674
1675         first_buf->nb_segs = count;
1676         first_buf->pkt_len = total_length;
1677
1678         if (mbuf->ol_flags & PKT_TX_VLAN_PKT) {
1679                 first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1680                 first_buf->vlan_tci = mbuf->vlan_tci;
1681         }
1682
1683         avp_dev_buffer_sanity_check(avp, buffers[0]);
1684
1685         return total_length;
1686 }
1687
1688
1689 static uint16_t
1690 avp_xmit_scattered_pkts(void *tx_queue,
1691                         struct rte_mbuf **tx_pkts,
1692                         uint16_t nb_pkts)
1693 {
1694         struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST *
1695                                        RTE_AVP_MAX_MBUF_SEGMENTS)] = {};
1696         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1697         struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST];
1698         struct avp_dev *avp = txq->avp;
1699         struct rte_avp_fifo *alloc_q;
1700         struct rte_avp_fifo *tx_q;
1701         unsigned int count, avail, n;
1702         unsigned int orig_nb_pkts;
1703         struct rte_mbuf *m;
1704         unsigned int required;
1705         unsigned int segments;
1706         unsigned int tx_bytes;
1707         unsigned int i;
1708
1709         orig_nb_pkts = nb_pkts;
1710         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1711                 /* VM live migration in progress */
1712                 /* TODO ... buffer for X packets then drop? */
1713                 txq->errors += nb_pkts;
1714                 return 0;
1715         }
1716
1717         tx_q = avp->tx_q[txq->queue_id];
1718         alloc_q = avp->alloc_q[txq->queue_id];
1719
1720         /* limit the number of transmitted packets to the max burst size */
1721         if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1722                 nb_pkts = AVP_MAX_TX_BURST;
1723
1724         /* determine how many buffers are available to copy into */
1725         avail = avp_fifo_count(alloc_q);
1726         if (unlikely(avail > (AVP_MAX_TX_BURST *
1727                               RTE_AVP_MAX_MBUF_SEGMENTS)))
1728                 avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS;
1729
1730         /* determine how many slots are available in the transmit queue */
1731         count = avp_fifo_free_count(tx_q);
1732
1733         /* determine how many packets can be sent */
1734         nb_pkts = RTE_MIN(count, nb_pkts);
1735
1736         /* determine how many packets will fit in the available buffers */
1737         count = 0;
1738         segments = 0;
1739         for (i = 0; i < nb_pkts; i++) {
1740                 m = tx_pkts[i];
1741                 if (likely(i < (unsigned int)nb_pkts - 1)) {
1742                         /* prefetch next entry while processing this one */
1743                         rte_prefetch0(tx_pkts[i + 1]);
1744                 }
1745                 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1746                         avp->host_mbuf_size;
1747
1748                 if (unlikely((required == 0) ||
1749                              (required > RTE_AVP_MAX_MBUF_SEGMENTS)))
1750                         break;
1751                 else if (unlikely(required + segments > avail))
1752                         break;
1753                 segments += required;
1754                 count++;
1755         }
1756         nb_pkts = count;
1757
1758         if (unlikely(nb_pkts == 0)) {
1759                 /* no available buffers, or no space on the tx queue */
1760                 txq->errors += orig_nb_pkts;
1761                 return 0;
1762         }
1763
1764         PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1765                    nb_pkts, tx_q);
1766
1767         /* retrieve sufficient send buffers */
1768         n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments);
1769         if (unlikely(n != segments)) {
1770                 PMD_TX_LOG(DEBUG, "Failed to allocate buffers "
1771                            "n=%u, segments=%u, orig=%u\n",
1772                            n, segments, orig_nb_pkts);
1773                 txq->errors += orig_nb_pkts;
1774                 return 0;
1775         }
1776
1777         tx_bytes = 0;
1778         count = 0;
1779         for (i = 0; i < nb_pkts; i++) {
1780                 /* process each packet to be transmitted */
1781                 m = tx_pkts[i];
1782
1783                 /* determine how many buffers are required for this packet */
1784                 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1785                         avp->host_mbuf_size;
1786
1787                 tx_bytes += avp_dev_copy_to_buffers(avp, m,
1788                                                     &avp_bufs[count], required);
1789                 tx_bufs[i] = avp_bufs[count];
1790                 count += required;
1791
1792                 /* free the original mbuf */
1793                 rte_pktmbuf_free(m);
1794         }
1795
1796         txq->packets += nb_pkts;
1797         txq->bytes += tx_bytes;
1798
1799 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1800         for (i = 0; i < nb_pkts; i++)
1801                 avp_dev_buffer_sanity_check(avp, tx_bufs[i]);
1802 #endif
1803
1804         /* send the packets */
1805         n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts);
1806         if (unlikely(n != orig_nb_pkts))
1807                 txq->errors += (orig_nb_pkts - n);
1808
1809         return n;
1810 }
1811
1812
1813 static uint16_t
1814 avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1815 {
1816         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1817         struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST];
1818         struct avp_dev *avp = txq->avp;
1819         struct rte_avp_desc *pkt_buf;
1820         struct rte_avp_fifo *alloc_q;
1821         struct rte_avp_fifo *tx_q;
1822         unsigned int count, avail, n;
1823         struct rte_mbuf *m;
1824         unsigned int pkt_len;
1825         unsigned int tx_bytes;
1826         char *pkt_data;
1827         unsigned int i;
1828
1829         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1830                 /* VM live migration in progress */
1831                 /* TODO ... buffer for X packets then drop?! */
1832                 txq->errors++;
1833                 return 0;
1834         }
1835
1836         tx_q = avp->tx_q[txq->queue_id];
1837         alloc_q = avp->alloc_q[txq->queue_id];
1838
1839         /* limit the number of transmitted packets to the max burst size */
1840         if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1841                 nb_pkts = AVP_MAX_TX_BURST;
1842
1843         /* determine how many buffers are available to copy into */
1844         avail = avp_fifo_count(alloc_q);
1845
1846         /* determine how many slots are available in the transmit queue */
1847         count = avp_fifo_free_count(tx_q);
1848
1849         /* determine how many packets can be sent */
1850         count = RTE_MIN(count, avail);
1851         count = RTE_MIN(count, nb_pkts);
1852
1853         if (unlikely(count == 0)) {
1854                 /* no available buffers, or no space on the tx queue */
1855                 txq->errors += nb_pkts;
1856                 return 0;
1857         }
1858
1859         PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1860                    count, tx_q);
1861
1862         /* retrieve sufficient send buffers */
1863         n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count);
1864         if (unlikely(n != count)) {
1865                 txq->errors++;
1866                 return 0;
1867         }
1868
1869         tx_bytes = 0;
1870         for (i = 0; i < count; i++) {
1871                 /* prefetch next entry while processing the current one */
1872                 if (i < count - 1) {
1873                         pkt_buf = avp_dev_translate_buffer(avp,
1874                                                            avp_bufs[i + 1]);
1875                         rte_prefetch0(pkt_buf);
1876                 }
1877
1878                 /* process each packet to be transmitted */
1879                 m = tx_pkts[i];
1880
1881                 /* Adjust pointers for guest addressing */
1882                 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1883                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1884                 pkt_len = rte_pktmbuf_pkt_len(m);
1885
1886                 if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1887                                          (pkt_len > avp->host_mbuf_size))) {
1888                         /*
1889                          * application should be using the scattered transmit
1890                          * function; send it truncated to avoid the performance
1891                          * hit of having to manage returning the already
1892                          * allocated buffer to the free list.  This should not
1893                          * happen since the application should have set the
1894                          * max_rx_pkt_len based on its MTU and it should be
1895                          * policing its own packet sizes.
1896                          */
1897                         txq->errors++;
1898                         pkt_len = RTE_MIN(avp->guest_mbuf_size,
1899                                           avp->host_mbuf_size);
1900                 }
1901
1902                 /* copy data out of our mbuf and into the AVP buffer */
1903                 rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len);
1904                 pkt_buf->pkt_len = pkt_len;
1905                 pkt_buf->data_len = pkt_len;
1906                 pkt_buf->nb_segs = 1;
1907                 pkt_buf->next = NULL;
1908
1909                 if (m->ol_flags & PKT_TX_VLAN_PKT) {
1910                         pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1911                         pkt_buf->vlan_tci = m->vlan_tci;
1912                 }
1913
1914                 tx_bytes += pkt_len;
1915
1916                 /* free the original mbuf */
1917                 rte_pktmbuf_free(m);
1918         }
1919
1920         txq->packets += count;
1921         txq->bytes += tx_bytes;
1922
1923         /* send the packets */
1924         n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count);
1925
1926         return n;
1927 }
1928
1929 static void
1930 avp_dev_rx_queue_release(void *rx_queue)
1931 {
1932         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1933         struct avp_dev *avp = rxq->avp;
1934         struct rte_eth_dev_data *data = avp->dev_data;
1935         unsigned int i;
1936
1937         for (i = 0; i < avp->num_rx_queues; i++) {
1938                 if (data->rx_queues[i] == rxq) {
1939                         rte_free(data->rx_queues[i]);
1940                         data->rx_queues[i] = NULL;
1941                 }
1942         }
1943 }
1944
1945 static void
1946 avp_dev_rx_queue_release_all(struct rte_eth_dev *eth_dev)
1947 {
1948         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1949         struct rte_eth_dev_data *data = avp->dev_data;
1950         unsigned int i;
1951
1952         for (i = 0; i < avp->num_rx_queues; i++) {
1953                 if (data->rx_queues[i]) {
1954                         rte_free(data->rx_queues[i]);
1955                         data->rx_queues[i] = NULL;
1956                 }
1957         }
1958 }
1959
1960 static void
1961 avp_dev_tx_queue_release(void *tx_queue)
1962 {
1963         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1964         struct avp_dev *avp = txq->avp;
1965         struct rte_eth_dev_data *data = avp->dev_data;
1966         unsigned int i;
1967
1968         for (i = 0; i < avp->num_tx_queues; i++) {
1969                 if (data->tx_queues[i] == txq) {
1970                         rte_free(data->tx_queues[i]);
1971                         data->tx_queues[i] = NULL;
1972                 }
1973         }
1974 }
1975
1976 static void
1977 avp_dev_tx_queue_release_all(struct rte_eth_dev *eth_dev)
1978 {
1979         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1980         struct rte_eth_dev_data *data = avp->dev_data;
1981         unsigned int i;
1982
1983         for (i = 0; i < avp->num_tx_queues; i++) {
1984                 if (data->tx_queues[i]) {
1985                         rte_free(data->tx_queues[i]);
1986                         data->tx_queues[i] = NULL;
1987                 }
1988         }
1989 }
1990
1991 static int
1992 avp_dev_configure(struct rte_eth_dev *eth_dev)
1993 {
1994         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
1995         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1996         struct rte_avp_device_info *host_info;
1997         struct rte_avp_device_config config;
1998         int mask = 0;
1999         void *addr;
2000         int ret;
2001
2002         rte_spinlock_lock(&avp->lock);
2003         if (avp->flags & AVP_F_DETACHED) {
2004                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2005                 ret = -ENOTSUP;
2006                 goto unlock;
2007         }
2008
2009         addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
2010         host_info = (struct rte_avp_device_info *)addr;
2011
2012         /* Setup required number of queues */
2013         _avp_set_queue_counts(eth_dev);
2014
2015         mask = (ETH_VLAN_STRIP_MASK |
2016                 ETH_VLAN_FILTER_MASK |
2017                 ETH_VLAN_EXTEND_MASK);
2018         ret = avp_vlan_offload_set(eth_dev, mask);
2019         if (ret < 0) {
2020                 PMD_DRV_LOG(ERR, "VLAN offload set failed by host, ret=%d\n",
2021                             ret);
2022                 goto unlock;
2023         }
2024
2025         /* update device config */
2026         memset(&config, 0, sizeof(config));
2027         config.device_id = host_info->device_id;
2028         config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
2029         config.driver_version = AVP_DPDK_DRIVER_VERSION;
2030         config.features = avp->features;
2031         config.num_tx_queues = avp->num_tx_queues;
2032         config.num_rx_queues = avp->num_rx_queues;
2033
2034         ret = avp_dev_ctrl_set_config(eth_dev, &config);
2035         if (ret < 0) {
2036                 PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
2037                             ret);
2038                 goto unlock;
2039         }
2040
2041         avp->flags |= AVP_F_CONFIGURED;
2042         ret = 0;
2043
2044 unlock:
2045         rte_spinlock_unlock(&avp->lock);
2046         return ret;
2047 }
2048
2049 static int
2050 avp_dev_start(struct rte_eth_dev *eth_dev)
2051 {
2052         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2053         int ret;
2054
2055         rte_spinlock_lock(&avp->lock);
2056         if (avp->flags & AVP_F_DETACHED) {
2057                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2058                 ret = -ENOTSUP;
2059                 goto unlock;
2060         }
2061
2062         /* update link state */
2063         ret = avp_dev_ctrl_set_link_state(eth_dev, 1);
2064         if (ret < 0) {
2065                 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2066                             ret);
2067                 goto unlock;
2068         }
2069
2070         /* remember current link state */
2071         avp->flags |= AVP_F_LINKUP;
2072
2073         ret = 0;
2074
2075 unlock:
2076         rte_spinlock_unlock(&avp->lock);
2077         return ret;
2078 }
2079
2080 static void
2081 avp_dev_stop(struct rte_eth_dev *eth_dev)
2082 {
2083         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2084         int ret;
2085
2086         rte_spinlock_lock(&avp->lock);
2087         if (avp->flags & AVP_F_DETACHED) {
2088                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2089                 goto unlock;
2090         }
2091
2092         /* remember current link state */
2093         avp->flags &= ~AVP_F_LINKUP;
2094
2095         /* update link state */
2096         ret = avp_dev_ctrl_set_link_state(eth_dev, 0);
2097         if (ret < 0) {
2098                 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2099                             ret);
2100         }
2101
2102 unlock:
2103         rte_spinlock_unlock(&avp->lock);
2104 }
2105
2106 static int
2107 avp_dev_close(struct rte_eth_dev *eth_dev)
2108 {
2109         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2110         int ret;
2111
2112         rte_spinlock_lock(&avp->lock);
2113         if (avp->flags & AVP_F_DETACHED) {
2114                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2115                 goto unlock;
2116         }
2117
2118         /* remember current link state */
2119         avp->flags &= ~AVP_F_LINKUP;
2120         avp->flags &= ~AVP_F_CONFIGURED;
2121
2122         ret = avp_dev_disable_interrupts(eth_dev);
2123         if (ret < 0) {
2124                 PMD_DRV_LOG(ERR, "Failed to disable interrupts\n");
2125                 /* continue */
2126         }
2127
2128         /* update device state */
2129         ret = avp_dev_ctrl_shutdown(eth_dev);
2130         if (ret < 0) {
2131                 PMD_DRV_LOG(ERR, "Device shutdown failed by host, ret=%d\n",
2132                             ret);
2133                 /* continue */
2134         }
2135
2136         /* release dynamic storage for rx/tx queues */
2137         avp_dev_rx_queue_release_all(eth_dev);
2138         avp_dev_tx_queue_release_all(eth_dev);
2139
2140 unlock:
2141         rte_spinlock_unlock(&avp->lock);
2142         return 0;
2143 }
2144
2145 static int
2146 avp_dev_link_update(struct rte_eth_dev *eth_dev,
2147                                         __rte_unused int wait_to_complete)
2148 {
2149         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2150         struct rte_eth_link *link = &eth_dev->data->dev_link;
2151
2152         link->link_speed = ETH_SPEED_NUM_10G;
2153         link->link_duplex = ETH_LINK_FULL_DUPLEX;
2154         link->link_status = !!(avp->flags & AVP_F_LINKUP);
2155
2156         return -1;
2157 }
2158
2159 static int
2160 avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2161 {
2162         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2163
2164         rte_spinlock_lock(&avp->lock);
2165         if ((avp->flags & AVP_F_PROMISC) == 0) {
2166                 avp->flags |= AVP_F_PROMISC;
2167                 PMD_DRV_LOG(DEBUG, "Promiscuous mode enabled on %u\n",
2168                             eth_dev->data->port_id);
2169         }
2170         rte_spinlock_unlock(&avp->lock);
2171
2172         return 0;
2173 }
2174
2175 static int
2176 avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev)
2177 {
2178         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2179
2180         rte_spinlock_lock(&avp->lock);
2181         if ((avp->flags & AVP_F_PROMISC) != 0) {
2182                 avp->flags &= ~AVP_F_PROMISC;
2183                 PMD_DRV_LOG(DEBUG, "Promiscuous mode disabled on %u\n",
2184                             eth_dev->data->port_id);
2185         }
2186         rte_spinlock_unlock(&avp->lock);
2187
2188         return 0;
2189 }
2190
2191 static int
2192 avp_dev_info_get(struct rte_eth_dev *eth_dev,
2193                  struct rte_eth_dev_info *dev_info)
2194 {
2195         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2196
2197         dev_info->max_rx_queues = avp->max_rx_queues;
2198         dev_info->max_tx_queues = avp->max_tx_queues;
2199         dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE;
2200         dev_info->max_rx_pktlen = avp->max_rx_pkt_len;
2201         dev_info->max_mac_addrs = AVP_MAX_MAC_ADDRS;
2202         if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2203                 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2204                 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
2205         }
2206
2207         return 0;
2208 }
2209
2210 static int
2211 avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
2212 {
2213         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2214         struct rte_eth_conf *dev_conf = &eth_dev->data->dev_conf;
2215         uint64_t offloads = dev_conf->rxmode.offloads;
2216
2217         if (mask & ETH_VLAN_STRIP_MASK) {
2218                 if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2219                         if (offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
2220                                 avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD;
2221                         else
2222                                 avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD;
2223                 } else {
2224                         PMD_DRV_LOG(ERR, "VLAN strip offload not supported\n");
2225                 }
2226         }
2227
2228         if (mask & ETH_VLAN_FILTER_MASK) {
2229                 if (offloads & DEV_RX_OFFLOAD_VLAN_FILTER)
2230                         PMD_DRV_LOG(ERR, "VLAN filter offload not supported\n");
2231         }
2232
2233         if (mask & ETH_VLAN_EXTEND_MASK) {
2234                 if (offloads & DEV_RX_OFFLOAD_VLAN_EXTEND)
2235                         PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n");
2236         }
2237
2238         return 0;
2239 }
2240
2241 static int
2242 avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats)
2243 {
2244         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2245         unsigned int i;
2246
2247         for (i = 0; i < avp->num_rx_queues; i++) {
2248                 struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2249
2250                 if (rxq) {
2251                         stats->ipackets += rxq->packets;
2252                         stats->ibytes += rxq->bytes;
2253                         stats->ierrors += rxq->errors;
2254
2255                         stats->q_ipackets[i] += rxq->packets;
2256                         stats->q_ibytes[i] += rxq->bytes;
2257                         stats->q_errors[i] += rxq->errors;
2258                 }
2259         }
2260
2261         for (i = 0; i < avp->num_tx_queues; i++) {
2262                 struct avp_queue *txq = avp->dev_data->tx_queues[i];
2263
2264                 if (txq) {
2265                         stats->opackets += txq->packets;
2266                         stats->obytes += txq->bytes;
2267                         stats->oerrors += txq->errors;
2268
2269                         stats->q_opackets[i] += txq->packets;
2270                         stats->q_obytes[i] += txq->bytes;
2271                 }
2272         }
2273
2274         return 0;
2275 }
2276
2277 static int
2278 avp_dev_stats_reset(struct rte_eth_dev *eth_dev)
2279 {
2280         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2281         unsigned int i;
2282
2283         for (i = 0; i < avp->num_rx_queues; i++) {
2284                 struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2285
2286                 if (rxq) {
2287                         rxq->bytes = 0;
2288                         rxq->packets = 0;
2289                         rxq->errors = 0;
2290                 }
2291         }
2292
2293         for (i = 0; i < avp->num_tx_queues; i++) {
2294                 struct avp_queue *txq = avp->dev_data->tx_queues[i];
2295
2296                 if (txq) {
2297                         txq->bytes = 0;
2298                         txq->packets = 0;
2299                         txq->errors = 0;
2300                 }
2301         }
2302
2303         return 0;
2304 }
2305
2306 RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd);
2307 RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map);
2308 RTE_LOG_REGISTER(avp_logtype_driver, pmd.net.avp.driver, NOTICE);