net/mlx5: add Altivec Rx
[dpdk.git] / drivers / net / avp / avp_ethdev.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2013-2017 Wind River Systems, Inc.
3  */
4
5 #include <stdint.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <unistd.h>
10
11 #include <rte_ethdev_driver.h>
12 #include <rte_ethdev_pci.h>
13 #include <rte_memcpy.h>
14 #include <rte_string_fns.h>
15 #include <rte_malloc.h>
16 #include <rte_atomic.h>
17 #include <rte_branch_prediction.h>
18 #include <rte_pci.h>
19 #include <rte_bus_pci.h>
20 #include <rte_ether.h>
21 #include <rte_common.h>
22 #include <rte_cycles.h>
23 #include <rte_spinlock.h>
24 #include <rte_byteorder.h>
25 #include <rte_dev.h>
26 #include <rte_memory.h>
27 #include <rte_eal.h>
28 #include <rte_io.h>
29
30 #include "rte_avp_common.h"
31 #include "rte_avp_fifo.h"
32
33 #include "avp_logs.h"
34
35 int avp_logtype_driver;
36
37 static int avp_dev_create(struct rte_pci_device *pci_dev,
38                           struct rte_eth_dev *eth_dev);
39
40 static int avp_dev_configure(struct rte_eth_dev *dev);
41 static int avp_dev_start(struct rte_eth_dev *dev);
42 static void avp_dev_stop(struct rte_eth_dev *dev);
43 static void avp_dev_close(struct rte_eth_dev *dev);
44 static int avp_dev_info_get(struct rte_eth_dev *dev,
45                             struct rte_eth_dev_info *dev_info);
46 static int avp_vlan_offload_set(struct rte_eth_dev *dev, int mask);
47 static int avp_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete);
48 static int avp_dev_promiscuous_enable(struct rte_eth_dev *dev);
49 static int avp_dev_promiscuous_disable(struct rte_eth_dev *dev);
50
51 static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev,
52                                   uint16_t rx_queue_id,
53                                   uint16_t nb_rx_desc,
54                                   unsigned int socket_id,
55                                   const struct rte_eth_rxconf *rx_conf,
56                                   struct rte_mempool *pool);
57
58 static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev,
59                                   uint16_t tx_queue_id,
60                                   uint16_t nb_tx_desc,
61                                   unsigned int socket_id,
62                                   const struct rte_eth_txconf *tx_conf);
63
64 static uint16_t avp_recv_scattered_pkts(void *rx_queue,
65                                         struct rte_mbuf **rx_pkts,
66                                         uint16_t nb_pkts);
67
68 static uint16_t avp_recv_pkts(void *rx_queue,
69                               struct rte_mbuf **rx_pkts,
70                               uint16_t nb_pkts);
71
72 static uint16_t avp_xmit_scattered_pkts(void *tx_queue,
73                                         struct rte_mbuf **tx_pkts,
74                                         uint16_t nb_pkts);
75
76 static uint16_t avp_xmit_pkts(void *tx_queue,
77                               struct rte_mbuf **tx_pkts,
78                               uint16_t nb_pkts);
79
80 static void avp_dev_rx_queue_release(void *rxq);
81 static void avp_dev_tx_queue_release(void *txq);
82
83 static int avp_dev_stats_get(struct rte_eth_dev *dev,
84                               struct rte_eth_stats *stats);
85 static int avp_dev_stats_reset(struct rte_eth_dev *dev);
86
87
88 #define AVP_MAX_RX_BURST 64
89 #define AVP_MAX_TX_BURST 64
90 #define AVP_MAX_MAC_ADDRS 1
91 #define AVP_MIN_RX_BUFSIZE RTE_ETHER_MIN_LEN
92
93
94 /*
95  * Defines the number of microseconds to wait before checking the response
96  * queue for completion.
97  */
98 #define AVP_REQUEST_DELAY_USECS (5000)
99
100 /*
101  * Defines the number times to check the response queue for completion before
102  * declaring a timeout.
103  */
104 #define AVP_MAX_REQUEST_RETRY (100)
105
106 /* Defines the current PCI driver version number */
107 #define AVP_DPDK_DRIVER_VERSION RTE_AVP_CURRENT_GUEST_VERSION
108
109 /*
110  * The set of PCI devices this driver supports
111  */
112 static const struct rte_pci_id pci_id_avp_map[] = {
113         { .vendor_id = RTE_AVP_PCI_VENDOR_ID,
114           .device_id = RTE_AVP_PCI_DEVICE_ID,
115           .subsystem_vendor_id = RTE_AVP_PCI_SUB_VENDOR_ID,
116           .subsystem_device_id = RTE_AVP_PCI_SUB_DEVICE_ID,
117           .class_id = RTE_CLASS_ANY_ID,
118         },
119
120         { .vendor_id = 0, /* sentinel */
121         },
122 };
123
124 /*
125  * dev_ops for avp, bare necessities for basic operation
126  */
127 static const struct eth_dev_ops avp_eth_dev_ops = {
128         .dev_configure       = avp_dev_configure,
129         .dev_start           = avp_dev_start,
130         .dev_stop            = avp_dev_stop,
131         .dev_close           = avp_dev_close,
132         .dev_infos_get       = avp_dev_info_get,
133         .vlan_offload_set    = avp_vlan_offload_set,
134         .stats_get           = avp_dev_stats_get,
135         .stats_reset         = avp_dev_stats_reset,
136         .link_update         = avp_dev_link_update,
137         .promiscuous_enable  = avp_dev_promiscuous_enable,
138         .promiscuous_disable = avp_dev_promiscuous_disable,
139         .rx_queue_setup      = avp_dev_rx_queue_setup,
140         .rx_queue_release    = avp_dev_rx_queue_release,
141         .tx_queue_setup      = avp_dev_tx_queue_setup,
142         .tx_queue_release    = avp_dev_tx_queue_release,
143 };
144
145 /**@{ AVP device flags */
146 #define AVP_F_PROMISC (1 << 1)
147 #define AVP_F_CONFIGURED (1 << 2)
148 #define AVP_F_LINKUP (1 << 3)
149 #define AVP_F_DETACHED (1 << 4)
150 /**@} */
151
152 /* Ethernet device validation marker */
153 #define AVP_ETHDEV_MAGIC 0x92972862
154
155 /*
156  * Defines the AVP device attributes which are attached to an RTE ethernet
157  * device
158  */
159 struct avp_dev {
160         uint32_t magic; /**< Memory validation marker */
161         uint64_t device_id; /**< Unique system identifier */
162         struct rte_ether_addr ethaddr; /**< Host specified MAC address */
163         struct rte_eth_dev_data *dev_data;
164         /**< Back pointer to ethernet device data */
165         volatile uint32_t flags; /**< Device operational flags */
166         uint16_t port_id; /**< Ethernet port identifier */
167         struct rte_mempool *pool; /**< pkt mbuf mempool */
168         unsigned int guest_mbuf_size; /**< local pool mbuf size */
169         unsigned int host_mbuf_size; /**< host mbuf size */
170         unsigned int max_rx_pkt_len; /**< maximum receive unit */
171         uint32_t host_features; /**< Supported feature bitmap */
172         uint32_t features; /**< Enabled feature bitmap */
173         unsigned int num_tx_queues; /**< Negotiated number of transmit queues */
174         unsigned int max_tx_queues; /**< Maximum number of transmit queues */
175         unsigned int num_rx_queues; /**< Negotiated number of receive queues */
176         unsigned int max_rx_queues; /**< Maximum number of receive queues */
177
178         struct rte_avp_fifo *tx_q[RTE_AVP_MAX_QUEUES]; /**< TX queue */
179         struct rte_avp_fifo *rx_q[RTE_AVP_MAX_QUEUES]; /**< RX queue */
180         struct rte_avp_fifo *alloc_q[RTE_AVP_MAX_QUEUES];
181         /**< Allocated mbufs queue */
182         struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES];
183         /**< To be freed mbufs queue */
184
185         /* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */
186         rte_spinlock_t lock;
187
188         /* For request & response */
189         struct rte_avp_fifo *req_q; /**< Request queue */
190         struct rte_avp_fifo *resp_q; /**< Response queue */
191         void *host_sync_addr; /**< (host) Req/Resp Mem address */
192         void *sync_addr; /**< Req/Resp Mem address */
193         void *host_mbuf_addr; /**< (host) MBUF pool start address */
194         void *mbuf_addr; /**< MBUF pool start address */
195 } __rte_cache_aligned;
196
197 /* RTE ethernet private data */
198 struct avp_adapter {
199         struct avp_dev avp;
200 } __rte_cache_aligned;
201
202
203 /* 32-bit MMIO register write */
204 #define AVP_WRITE32(_value, _addr) rte_write32_relaxed((_value), (_addr))
205
206 /* 32-bit MMIO register read */
207 #define AVP_READ32(_addr) rte_read32_relaxed((_addr))
208
209 /* Macro to cast the ethernet device private data to a AVP object */
210 #define AVP_DEV_PRIVATE_TO_HW(adapter) \
211         (&((struct avp_adapter *)adapter)->avp)
212
213 /*
214  * Defines the structure of a AVP device queue for the purpose of handling the
215  * receive and transmit burst callback functions
216  */
217 struct avp_queue {
218         struct rte_eth_dev_data *dev_data;
219         /**< Backpointer to ethernet device data */
220         struct avp_dev *avp; /**< Backpointer to AVP device */
221         uint16_t queue_id;
222         /**< Queue identifier used for indexing current queue */
223         uint16_t queue_base;
224         /**< Base queue identifier for queue servicing */
225         uint16_t queue_limit;
226         /**< Maximum queue identifier for queue servicing */
227
228         uint64_t packets;
229         uint64_t bytes;
230         uint64_t errors;
231 };
232
233 /* send a request and wait for a response
234  *
235  * @warning must be called while holding the avp->lock spinlock.
236  */
237 static int
238 avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request)
239 {
240         unsigned int retry = AVP_MAX_REQUEST_RETRY;
241         void *resp_addr = NULL;
242         unsigned int count;
243         int ret;
244
245         PMD_DRV_LOG(DEBUG, "Sending request %u to host\n", request->req_id);
246
247         request->result = -ENOTSUP;
248
249         /* Discard any stale responses before starting a new request */
250         while (avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1))
251                 PMD_DRV_LOG(DEBUG, "Discarding stale response\n");
252
253         rte_memcpy(avp->sync_addr, request, sizeof(*request));
254         count = avp_fifo_put(avp->req_q, &avp->host_sync_addr, 1);
255         if (count < 1) {
256                 PMD_DRV_LOG(ERR, "Cannot send request %u to host\n",
257                             request->req_id);
258                 ret = -EBUSY;
259                 goto done;
260         }
261
262         while (retry--) {
263                 /* wait for a response */
264                 usleep(AVP_REQUEST_DELAY_USECS);
265
266                 count = avp_fifo_count(avp->resp_q);
267                 if (count >= 1) {
268                         /* response received */
269                         break;
270                 }
271
272                 if ((count < 1) && (retry == 0)) {
273                         PMD_DRV_LOG(ERR, "Timeout while waiting for a response for %u\n",
274                                     request->req_id);
275                         ret = -ETIME;
276                         goto done;
277                 }
278         }
279
280         /* retrieve the response */
281         count = avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1);
282         if ((count != 1) || (resp_addr != avp->host_sync_addr)) {
283                 PMD_DRV_LOG(ERR, "Invalid response from host, count=%u resp=%p host_sync_addr=%p\n",
284                             count, resp_addr, avp->host_sync_addr);
285                 ret = -ENODATA;
286                 goto done;
287         }
288
289         /* copy to user buffer */
290         rte_memcpy(request, avp->sync_addr, sizeof(*request));
291         ret = 0;
292
293         PMD_DRV_LOG(DEBUG, "Result %d received for request %u\n",
294                     request->result, request->req_id);
295
296 done:
297         return ret;
298 }
299
300 static int
301 avp_dev_ctrl_set_link_state(struct rte_eth_dev *eth_dev, unsigned int state)
302 {
303         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
304         struct rte_avp_request request;
305         int ret;
306
307         /* setup a link state change request */
308         memset(&request, 0, sizeof(request));
309         request.req_id = RTE_AVP_REQ_CFG_NETWORK_IF;
310         request.if_up = state;
311
312         ret = avp_dev_process_request(avp, &request);
313
314         return ret == 0 ? request.result : ret;
315 }
316
317 static int
318 avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev,
319                         struct rte_avp_device_config *config)
320 {
321         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
322         struct rte_avp_request request;
323         int ret;
324
325         /* setup a configure request */
326         memset(&request, 0, sizeof(request));
327         request.req_id = RTE_AVP_REQ_CFG_DEVICE;
328         memcpy(&request.config, config, sizeof(request.config));
329
330         ret = avp_dev_process_request(avp, &request);
331
332         return ret == 0 ? request.result : ret;
333 }
334
335 static int
336 avp_dev_ctrl_shutdown(struct rte_eth_dev *eth_dev)
337 {
338         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
339         struct rte_avp_request request;
340         int ret;
341
342         /* setup a shutdown request */
343         memset(&request, 0, sizeof(request));
344         request.req_id = RTE_AVP_REQ_SHUTDOWN_DEVICE;
345
346         ret = avp_dev_process_request(avp, &request);
347
348         return ret == 0 ? request.result : ret;
349 }
350
351 /* translate from host mbuf virtual address to guest virtual address */
352 static inline void *
353 avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address)
354 {
355         return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address,
356                                        (uintptr_t)avp->host_mbuf_addr),
357                            (uintptr_t)avp->mbuf_addr);
358 }
359
360 /* translate from host physical address to guest virtual address */
361 static void *
362 avp_dev_translate_address(struct rte_eth_dev *eth_dev,
363                           rte_iova_t host_phys_addr)
364 {
365         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
366         struct rte_mem_resource *resource;
367         struct rte_avp_memmap_info *info;
368         struct rte_avp_memmap *map;
369         off_t offset;
370         void *addr;
371         unsigned int i;
372
373         addr = pci_dev->mem_resource[RTE_AVP_PCI_MEMORY_BAR].addr;
374         resource = &pci_dev->mem_resource[RTE_AVP_PCI_MEMMAP_BAR];
375         info = (struct rte_avp_memmap_info *)resource->addr;
376
377         offset = 0;
378         for (i = 0; i < info->nb_maps; i++) {
379                 /* search all segments looking for a matching address */
380                 map = &info->maps[i];
381
382                 if ((host_phys_addr >= map->phys_addr) &&
383                         (host_phys_addr < (map->phys_addr + map->length))) {
384                         /* address is within this segment */
385                         offset += (host_phys_addr - map->phys_addr);
386                         addr = RTE_PTR_ADD(addr, (uintptr_t)offset);
387
388                         PMD_DRV_LOG(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p\n",
389                                     host_phys_addr, addr);
390
391                         return addr;
392                 }
393                 offset += map->length;
394         }
395
396         return NULL;
397 }
398
399 /* verify that the incoming device version is compatible with our version */
400 static int
401 avp_dev_version_check(uint32_t version)
402 {
403         uint32_t driver = RTE_AVP_STRIP_MINOR_VERSION(AVP_DPDK_DRIVER_VERSION);
404         uint32_t device = RTE_AVP_STRIP_MINOR_VERSION(version);
405
406         if (device <= driver) {
407                 /* the host driver version is less than or equal to ours */
408                 return 0;
409         }
410
411         return 1;
412 }
413
414 /* verify that memory regions have expected version and validation markers */
415 static int
416 avp_dev_check_regions(struct rte_eth_dev *eth_dev)
417 {
418         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
419         struct rte_avp_memmap_info *memmap;
420         struct rte_avp_device_info *info;
421         struct rte_mem_resource *resource;
422         unsigned int i;
423
424         /* Dump resource info for debug */
425         for (i = 0; i < PCI_MAX_RESOURCE; i++) {
426                 resource = &pci_dev->mem_resource[i];
427                 if ((resource->phys_addr == 0) || (resource->len == 0))
428                         continue;
429
430                 PMD_DRV_LOG(DEBUG, "resource[%u]: phys=0x%" PRIx64 " len=%" PRIu64 " addr=%p\n",
431                             i, resource->phys_addr,
432                             resource->len, resource->addr);
433
434                 switch (i) {
435                 case RTE_AVP_PCI_MEMMAP_BAR:
436                         memmap = (struct rte_avp_memmap_info *)resource->addr;
437                         if ((memmap->magic != RTE_AVP_MEMMAP_MAGIC) ||
438                             (memmap->version != RTE_AVP_MEMMAP_VERSION)) {
439                                 PMD_DRV_LOG(ERR, "Invalid memmap magic 0x%08x and version %u\n",
440                                             memmap->magic, memmap->version);
441                                 return -EINVAL;
442                         }
443                         break;
444
445                 case RTE_AVP_PCI_DEVICE_BAR:
446                         info = (struct rte_avp_device_info *)resource->addr;
447                         if ((info->magic != RTE_AVP_DEVICE_MAGIC) ||
448                             avp_dev_version_check(info->version)) {
449                                 PMD_DRV_LOG(ERR, "Invalid device info magic 0x%08x or version 0x%08x > 0x%08x\n",
450                                             info->magic, info->version,
451                                             AVP_DPDK_DRIVER_VERSION);
452                                 return -EINVAL;
453                         }
454                         break;
455
456                 case RTE_AVP_PCI_MEMORY_BAR:
457                 case RTE_AVP_PCI_MMIO_BAR:
458                         if (resource->addr == NULL) {
459                                 PMD_DRV_LOG(ERR, "Missing address space for BAR%u\n",
460                                             i);
461                                 return -EINVAL;
462                         }
463                         break;
464
465                 case RTE_AVP_PCI_MSIX_BAR:
466                 default:
467                         /* no validation required */
468                         break;
469                 }
470         }
471
472         return 0;
473 }
474
475 static int
476 avp_dev_detach(struct rte_eth_dev *eth_dev)
477 {
478         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
479         int ret;
480
481         PMD_DRV_LOG(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "\n",
482                     eth_dev->data->port_id, avp->device_id);
483
484         rte_spinlock_lock(&avp->lock);
485
486         if (avp->flags & AVP_F_DETACHED) {
487                 PMD_DRV_LOG(NOTICE, "port %u already detached\n",
488                             eth_dev->data->port_id);
489                 ret = 0;
490                 goto unlock;
491         }
492
493         /* shutdown the device first so the host stops sending us packets. */
494         ret = avp_dev_ctrl_shutdown(eth_dev);
495         if (ret < 0) {
496                 PMD_DRV_LOG(ERR, "Failed to send/recv shutdown to host, ret=%d\n",
497                             ret);
498                 avp->flags &= ~AVP_F_DETACHED;
499                 goto unlock;
500         }
501
502         avp->flags |= AVP_F_DETACHED;
503         rte_wmb();
504
505         /* wait for queues to acknowledge the presence of the detach flag */
506         rte_delay_ms(1);
507
508         ret = 0;
509
510 unlock:
511         rte_spinlock_unlock(&avp->lock);
512         return ret;
513 }
514
515 static void
516 _avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
517 {
518         struct avp_dev *avp =
519                 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
520         struct avp_queue *rxq;
521         uint16_t queue_count;
522         uint16_t remainder;
523
524         rxq = (struct avp_queue *)eth_dev->data->rx_queues[rx_queue_id];
525
526         /*
527          * Must map all AVP fifos as evenly as possible between the configured
528          * device queues.  Each device queue will service a subset of the AVP
529          * fifos. If there is an odd number of device queues the first set of
530          * device queues will get the extra AVP fifos.
531          */
532         queue_count = avp->num_rx_queues / eth_dev->data->nb_rx_queues;
533         remainder = avp->num_rx_queues % eth_dev->data->nb_rx_queues;
534         if (rx_queue_id < remainder) {
535                 /* these queues must service one extra FIFO */
536                 rxq->queue_base = rx_queue_id * (queue_count + 1);
537                 rxq->queue_limit = rxq->queue_base + (queue_count + 1) - 1;
538         } else {
539                 /* these queues service the regular number of FIFO */
540                 rxq->queue_base = ((remainder * (queue_count + 1)) +
541                                    ((rx_queue_id - remainder) * queue_count));
542                 rxq->queue_limit = rxq->queue_base + queue_count - 1;
543         }
544
545         PMD_DRV_LOG(DEBUG, "rxq %u at %p base %u limit %u\n",
546                     rx_queue_id, rxq, rxq->queue_base, rxq->queue_limit);
547
548         rxq->queue_id = rxq->queue_base;
549 }
550
551 static void
552 _avp_set_queue_counts(struct rte_eth_dev *eth_dev)
553 {
554         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
555         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
556         struct rte_avp_device_info *host_info;
557         void *addr;
558
559         addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
560         host_info = (struct rte_avp_device_info *)addr;
561
562         /*
563          * the transmit direction is not negotiated beyond respecting the max
564          * number of queues because the host can handle arbitrary guest tx
565          * queues (host rx queues).
566          */
567         avp->num_tx_queues = eth_dev->data->nb_tx_queues;
568
569         /*
570          * the receive direction is more restrictive.  The host requires a
571          * minimum number of guest rx queues (host tx queues) therefore
572          * negotiate a value that is at least as large as the host minimum
573          * requirement.  If the host and guest values are not identical then a
574          * mapping will be established in the receive_queue_setup function.
575          */
576         avp->num_rx_queues = RTE_MAX(host_info->min_rx_queues,
577                                      eth_dev->data->nb_rx_queues);
578
579         PMD_DRV_LOG(DEBUG, "Requesting %u Tx and %u Rx queues from host\n",
580                     avp->num_tx_queues, avp->num_rx_queues);
581 }
582
583 static int
584 avp_dev_attach(struct rte_eth_dev *eth_dev)
585 {
586         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
587         struct rte_avp_device_config config;
588         unsigned int i;
589         int ret;
590
591         PMD_DRV_LOG(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "\n",
592                     eth_dev->data->port_id, avp->device_id);
593
594         rte_spinlock_lock(&avp->lock);
595
596         if (!(avp->flags & AVP_F_DETACHED)) {
597                 PMD_DRV_LOG(NOTICE, "port %u already attached\n",
598                             eth_dev->data->port_id);
599                 ret = 0;
600                 goto unlock;
601         }
602
603         /*
604          * make sure that the detached flag is set prior to reconfiguring the
605          * queues.
606          */
607         avp->flags |= AVP_F_DETACHED;
608         rte_wmb();
609
610         /*
611          * re-run the device create utility which will parse the new host info
612          * and setup the AVP device queue pointers.
613          */
614         ret = avp_dev_create(RTE_ETH_DEV_TO_PCI(eth_dev), eth_dev);
615         if (ret < 0) {
616                 PMD_DRV_LOG(ERR, "Failed to re-create AVP device, ret=%d\n",
617                             ret);
618                 goto unlock;
619         }
620
621         if (avp->flags & AVP_F_CONFIGURED) {
622                 /*
623                  * Update the receive queue mapping to handle cases where the
624                  * source and destination hosts have different queue
625                  * requirements.  As long as the DETACHED flag is asserted the
626                  * queue table should not be referenced so it should be safe to
627                  * update it.
628                  */
629                 _avp_set_queue_counts(eth_dev);
630                 for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
631                         _avp_set_rx_queue_mappings(eth_dev, i);
632
633                 /*
634                  * Update the host with our config details so that it knows the
635                  * device is active.
636                  */
637                 memset(&config, 0, sizeof(config));
638                 config.device_id = avp->device_id;
639                 config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
640                 config.driver_version = AVP_DPDK_DRIVER_VERSION;
641                 config.features = avp->features;
642                 config.num_tx_queues = avp->num_tx_queues;
643                 config.num_rx_queues = avp->num_rx_queues;
644                 config.if_up = !!(avp->flags & AVP_F_LINKUP);
645
646                 ret = avp_dev_ctrl_set_config(eth_dev, &config);
647                 if (ret < 0) {
648                         PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
649                                     ret);
650                         goto unlock;
651                 }
652         }
653
654         rte_wmb();
655         avp->flags &= ~AVP_F_DETACHED;
656
657         ret = 0;
658
659 unlock:
660         rte_spinlock_unlock(&avp->lock);
661         return ret;
662 }
663
664 static void
665 avp_dev_interrupt_handler(void *data)
666 {
667         struct rte_eth_dev *eth_dev = data;
668         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
669         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
670         uint32_t status, value;
671         int ret;
672
673         if (registers == NULL)
674                 rte_panic("no mapped MMIO register space\n");
675
676         /* read the interrupt status register
677          * note: this register clears on read so all raised interrupts must be
678          *    handled or remembered for later processing
679          */
680         status = AVP_READ32(
681                 RTE_PTR_ADD(registers,
682                             RTE_AVP_INTERRUPT_STATUS_OFFSET));
683
684         if (status & RTE_AVP_MIGRATION_INTERRUPT_MASK) {
685                 /* handle interrupt based on current status */
686                 value = AVP_READ32(
687                         RTE_PTR_ADD(registers,
688                                     RTE_AVP_MIGRATION_STATUS_OFFSET));
689                 switch (value) {
690                 case RTE_AVP_MIGRATION_DETACHED:
691                         ret = avp_dev_detach(eth_dev);
692                         break;
693                 case RTE_AVP_MIGRATION_ATTACHED:
694                         ret = avp_dev_attach(eth_dev);
695                         break;
696                 default:
697                         PMD_DRV_LOG(ERR, "unexpected migration status, status=%u\n",
698                                     value);
699                         ret = -EINVAL;
700                 }
701
702                 /* acknowledge the request by writing out our current status */
703                 value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR);
704                 AVP_WRITE32(value,
705                             RTE_PTR_ADD(registers,
706                                         RTE_AVP_MIGRATION_ACK_OFFSET));
707
708                 PMD_DRV_LOG(NOTICE, "AVP migration interrupt handled\n");
709         }
710
711         if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK)
712                 PMD_DRV_LOG(WARNING, "AVP unexpected interrupt, status=0x%08x\n",
713                             status);
714
715         /* re-enable UIO interrupt handling */
716         ret = rte_intr_ack(&pci_dev->intr_handle);
717         if (ret < 0) {
718                 PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n",
719                             ret);
720                 /* continue */
721         }
722 }
723
724 static int
725 avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev)
726 {
727         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
728         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
729         int ret;
730
731         if (registers == NULL)
732                 return -EINVAL;
733
734         /* enable UIO interrupt handling */
735         ret = rte_intr_enable(&pci_dev->intr_handle);
736         if (ret < 0) {
737                 PMD_DRV_LOG(ERR, "Failed to enable UIO interrupts, ret=%d\n",
738                             ret);
739                 return ret;
740         }
741
742         /* inform the device that all interrupts are enabled */
743         AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK,
744                     RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
745
746         return 0;
747 }
748
749 static int
750 avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev)
751 {
752         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
753         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
754         int ret;
755
756         if (registers == NULL)
757                 return 0;
758
759         /* inform the device that all interrupts are disabled */
760         AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK,
761                     RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
762
763         /* enable UIO interrupt handling */
764         ret = rte_intr_disable(&pci_dev->intr_handle);
765         if (ret < 0) {
766                 PMD_DRV_LOG(ERR, "Failed to disable UIO interrupts, ret=%d\n",
767                             ret);
768                 return ret;
769         }
770
771         return 0;
772 }
773
774 static int
775 avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev)
776 {
777         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
778         int ret;
779
780         /* register a callback handler with UIO for interrupt notifications */
781         ret = rte_intr_callback_register(&pci_dev->intr_handle,
782                                          avp_dev_interrupt_handler,
783                                          (void *)eth_dev);
784         if (ret < 0) {
785                 PMD_DRV_LOG(ERR, "Failed to register UIO interrupt callback, ret=%d\n",
786                             ret);
787                 return ret;
788         }
789
790         /* enable interrupt processing */
791         return avp_dev_enable_interrupts(eth_dev);
792 }
793
794 static int
795 avp_dev_migration_pending(struct rte_eth_dev *eth_dev)
796 {
797         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
798         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
799         uint32_t value;
800
801         if (registers == NULL)
802                 return 0;
803
804         value = AVP_READ32(RTE_PTR_ADD(registers,
805                                        RTE_AVP_MIGRATION_STATUS_OFFSET));
806         if (value == RTE_AVP_MIGRATION_DETACHED) {
807                 /* migration is in progress; ack it if we have not already */
808                 AVP_WRITE32(value,
809                             RTE_PTR_ADD(registers,
810                                         RTE_AVP_MIGRATION_ACK_OFFSET));
811                 return 1;
812         }
813         return 0;
814 }
815
816 /*
817  * create a AVP device using the supplied device info by first translating it
818  * to guest address space(s).
819  */
820 static int
821 avp_dev_create(struct rte_pci_device *pci_dev,
822                struct rte_eth_dev *eth_dev)
823 {
824         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
825         struct rte_avp_device_info *host_info;
826         struct rte_mem_resource *resource;
827         unsigned int i;
828
829         resource = &pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR];
830         if (resource->addr == NULL) {
831                 PMD_DRV_LOG(ERR, "BAR%u is not mapped\n",
832                             RTE_AVP_PCI_DEVICE_BAR);
833                 return -EFAULT;
834         }
835         host_info = (struct rte_avp_device_info *)resource->addr;
836
837         if ((host_info->magic != RTE_AVP_DEVICE_MAGIC) ||
838                 avp_dev_version_check(host_info->version)) {
839                 PMD_DRV_LOG(ERR, "Invalid AVP PCI device, magic 0x%08x version 0x%08x > 0x%08x\n",
840                             host_info->magic, host_info->version,
841                             AVP_DPDK_DRIVER_VERSION);
842                 return -EINVAL;
843         }
844
845         PMD_DRV_LOG(DEBUG, "AVP host device is v%u.%u.%u\n",
846                     RTE_AVP_GET_RELEASE_VERSION(host_info->version),
847                     RTE_AVP_GET_MAJOR_VERSION(host_info->version),
848                     RTE_AVP_GET_MINOR_VERSION(host_info->version));
849
850         PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u TX queue(s)\n",
851                     host_info->min_tx_queues, host_info->max_tx_queues);
852         PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u RX queue(s)\n",
853                     host_info->min_rx_queues, host_info->max_rx_queues);
854         PMD_DRV_LOG(DEBUG, "AVP host supports features 0x%08x\n",
855                     host_info->features);
856
857         if (avp->magic != AVP_ETHDEV_MAGIC) {
858                 /*
859                  * First time initialization (i.e., not during a VM
860                  * migration)
861                  */
862                 memset(avp, 0, sizeof(*avp));
863                 avp->magic = AVP_ETHDEV_MAGIC;
864                 avp->dev_data = eth_dev->data;
865                 avp->port_id = eth_dev->data->port_id;
866                 avp->host_mbuf_size = host_info->mbuf_size;
867                 avp->host_features = host_info->features;
868                 rte_spinlock_init(&avp->lock);
869                 memcpy(&avp->ethaddr.addr_bytes[0],
870                        host_info->ethaddr, RTE_ETHER_ADDR_LEN);
871                 /* adjust max values to not exceed our max */
872                 avp->max_tx_queues =
873                         RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES);
874                 avp->max_rx_queues =
875                         RTE_MIN(host_info->max_rx_queues, RTE_AVP_MAX_QUEUES);
876         } else {
877                 /* Re-attaching during migration */
878
879                 /* TODO... requires validation of host values */
880                 if ((host_info->features & avp->features) != avp->features) {
881                         PMD_DRV_LOG(ERR, "AVP host features mismatched; 0x%08x, host=0x%08x\n",
882                                     avp->features, host_info->features);
883                         /* this should not be possible; continue for now */
884                 }
885         }
886
887         /* the device id is allowed to change over migrations */
888         avp->device_id = host_info->device_id;
889
890         /* translate incoming host addresses to guest address space */
891         PMD_DRV_LOG(DEBUG, "AVP first host tx queue at 0x%" PRIx64 "\n",
892                     host_info->tx_phys);
893         PMD_DRV_LOG(DEBUG, "AVP first host alloc queue at 0x%" PRIx64 "\n",
894                     host_info->alloc_phys);
895         for (i = 0; i < avp->max_tx_queues; i++) {
896                 avp->tx_q[i] = avp_dev_translate_address(eth_dev,
897                         host_info->tx_phys + (i * host_info->tx_size));
898
899                 avp->alloc_q[i] = avp_dev_translate_address(eth_dev,
900                         host_info->alloc_phys + (i * host_info->alloc_size));
901         }
902
903         PMD_DRV_LOG(DEBUG, "AVP first host rx queue at 0x%" PRIx64 "\n",
904                     host_info->rx_phys);
905         PMD_DRV_LOG(DEBUG, "AVP first host free queue at 0x%" PRIx64 "\n",
906                     host_info->free_phys);
907         for (i = 0; i < avp->max_rx_queues; i++) {
908                 avp->rx_q[i] = avp_dev_translate_address(eth_dev,
909                         host_info->rx_phys + (i * host_info->rx_size));
910                 avp->free_q[i] = avp_dev_translate_address(eth_dev,
911                         host_info->free_phys + (i * host_info->free_size));
912         }
913
914         PMD_DRV_LOG(DEBUG, "AVP host request queue at 0x%" PRIx64 "\n",
915                     host_info->req_phys);
916         PMD_DRV_LOG(DEBUG, "AVP host response queue at 0x%" PRIx64 "\n",
917                     host_info->resp_phys);
918         PMD_DRV_LOG(DEBUG, "AVP host sync address at 0x%" PRIx64 "\n",
919                     host_info->sync_phys);
920         PMD_DRV_LOG(DEBUG, "AVP host mbuf address at 0x%" PRIx64 "\n",
921                     host_info->mbuf_phys);
922         avp->req_q = avp_dev_translate_address(eth_dev, host_info->req_phys);
923         avp->resp_q = avp_dev_translate_address(eth_dev, host_info->resp_phys);
924         avp->sync_addr =
925                 avp_dev_translate_address(eth_dev, host_info->sync_phys);
926         avp->mbuf_addr =
927                 avp_dev_translate_address(eth_dev, host_info->mbuf_phys);
928
929         /*
930          * store the host mbuf virtual address so that we can calculate
931          * relative offsets for each mbuf as they are processed
932          */
933         avp->host_mbuf_addr = host_info->mbuf_va;
934         avp->host_sync_addr = host_info->sync_va;
935
936         /*
937          * store the maximum packet length that is supported by the host.
938          */
939         avp->max_rx_pkt_len = host_info->max_rx_pkt_len;
940         PMD_DRV_LOG(DEBUG, "AVP host max receive packet length is %u\n",
941                                 host_info->max_rx_pkt_len);
942
943         return 0;
944 }
945
946 /*
947  * This function is based on probe() function in avp_pci.c
948  * It returns 0 on success.
949  */
950 static int
951 eth_avp_dev_init(struct rte_eth_dev *eth_dev)
952 {
953         struct avp_dev *avp =
954                 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
955         struct rte_pci_device *pci_dev;
956         int ret;
957
958         pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
959         eth_dev->dev_ops = &avp_eth_dev_ops;
960         eth_dev->rx_pkt_burst = &avp_recv_pkts;
961         eth_dev->tx_pkt_burst = &avp_xmit_pkts;
962         /* Let rte_eth_dev_close() release the port resources */
963         eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
964
965         if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
966                 /*
967                  * no setup required on secondary processes.  All data is saved
968                  * in dev_private by the primary process. All resource should
969                  * be mapped to the same virtual address so all pointers should
970                  * be valid.
971                  */
972                 if (eth_dev->data->scattered_rx) {
973                         PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
974                         eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
975                         eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
976                 }
977                 return 0;
978         }
979
980         rte_eth_copy_pci_info(eth_dev, pci_dev);
981
982         /* Check current migration status */
983         if (avp_dev_migration_pending(eth_dev)) {
984                 PMD_DRV_LOG(ERR, "VM live migration operation in progress\n");
985                 return -EBUSY;
986         }
987
988         /* Check BAR resources */
989         ret = avp_dev_check_regions(eth_dev);
990         if (ret < 0) {
991                 PMD_DRV_LOG(ERR, "Failed to validate BAR resources, ret=%d\n",
992                             ret);
993                 return ret;
994         }
995
996         /* Enable interrupts */
997         ret = avp_dev_setup_interrupts(eth_dev);
998         if (ret < 0) {
999                 PMD_DRV_LOG(ERR, "Failed to enable interrupts, ret=%d\n", ret);
1000                 return ret;
1001         }
1002
1003         /* Handle each subtype */
1004         ret = avp_dev_create(pci_dev, eth_dev);
1005         if (ret < 0) {
1006                 PMD_DRV_LOG(ERR, "Failed to create device, ret=%d\n", ret);
1007                 return ret;
1008         }
1009
1010         /* Allocate memory for storing MAC addresses */
1011         eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev",
1012                                         RTE_ETHER_ADDR_LEN, 0);
1013         if (eth_dev->data->mac_addrs == NULL) {
1014                 PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to store MAC addresses\n",
1015                             RTE_ETHER_ADDR_LEN);
1016                 return -ENOMEM;
1017         }
1018
1019         /* Get a mac from device config */
1020         rte_ether_addr_copy(&avp->ethaddr, &eth_dev->data->mac_addrs[0]);
1021
1022         return 0;
1023 }
1024
1025 static int
1026 eth_avp_dev_uninit(struct rte_eth_dev *eth_dev)
1027 {
1028         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1029                 return -EPERM;
1030
1031         if (eth_dev->data == NULL)
1032                 return 0;
1033
1034         avp_dev_close(eth_dev);
1035
1036         return 0;
1037 }
1038
1039 static int
1040 eth_avp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
1041                   struct rte_pci_device *pci_dev)
1042 {
1043         return rte_eth_dev_pci_generic_probe(pci_dev, sizeof(struct avp_adapter),
1044                         eth_avp_dev_init);
1045 }
1046
1047 static int
1048 eth_avp_pci_remove(struct rte_pci_device *pci_dev)
1049 {
1050         return rte_eth_dev_pci_generic_remove(pci_dev,
1051                                               eth_avp_dev_uninit);
1052 }
1053
1054 static struct rte_pci_driver rte_avp_pmd = {
1055         .id_table = pci_id_avp_map,
1056         .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
1057         .probe = eth_avp_pci_probe,
1058         .remove = eth_avp_pci_remove,
1059 };
1060
1061 static int
1062 avp_dev_enable_scattered(struct rte_eth_dev *eth_dev,
1063                          struct avp_dev *avp)
1064 {
1065         unsigned int max_rx_pkt_len;
1066
1067         max_rx_pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
1068
1069         if ((max_rx_pkt_len > avp->guest_mbuf_size) ||
1070             (max_rx_pkt_len > avp->host_mbuf_size)) {
1071                 /*
1072                  * If the guest MTU is greater than either the host or guest
1073                  * buffers then chained mbufs have to be enabled in the TX
1074                  * direction.  It is assumed that the application will not need
1075                  * to send packets larger than their max_rx_pkt_len (MRU).
1076                  */
1077                 return 1;
1078         }
1079
1080         if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) ||
1081             (avp->max_rx_pkt_len > avp->host_mbuf_size)) {
1082                 /*
1083                  * If the host MRU is greater than its own mbuf size or the
1084                  * guest mbuf size then chained mbufs have to be enabled in the
1085                  * RX direction.
1086                  */
1087                 return 1;
1088         }
1089
1090         return 0;
1091 }
1092
1093 static int
1094 avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
1095                        uint16_t rx_queue_id,
1096                        uint16_t nb_rx_desc,
1097                        unsigned int socket_id,
1098                        const struct rte_eth_rxconf *rx_conf,
1099                        struct rte_mempool *pool)
1100 {
1101         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1102         struct rte_pktmbuf_pool_private *mbp_priv;
1103         struct avp_queue *rxq;
1104
1105         if (rx_queue_id >= eth_dev->data->nb_rx_queues) {
1106                 PMD_DRV_LOG(ERR, "RX queue id is out of range: rx_queue_id=%u, nb_rx_queues=%u\n",
1107                             rx_queue_id, eth_dev->data->nb_rx_queues);
1108                 return -EINVAL;
1109         }
1110
1111         /* Save mbuf pool pointer */
1112         avp->pool = pool;
1113
1114         /* Save the local mbuf size */
1115         mbp_priv = rte_mempool_get_priv(pool);
1116         avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size);
1117         avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM;
1118
1119         if (avp_dev_enable_scattered(eth_dev, avp)) {
1120                 if (!eth_dev->data->scattered_rx) {
1121                         PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
1122                         eth_dev->data->scattered_rx = 1;
1123                         eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1124                         eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1125                 }
1126         }
1127
1128         PMD_DRV_LOG(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)\n",
1129                     avp->max_rx_pkt_len,
1130                     eth_dev->data->dev_conf.rxmode.max_rx_pkt_len,
1131                     avp->host_mbuf_size,
1132                     avp->guest_mbuf_size);
1133
1134         /* allocate a queue object */
1135         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct avp_queue),
1136                                  RTE_CACHE_LINE_SIZE, socket_id);
1137         if (rxq == NULL) {
1138                 PMD_DRV_LOG(ERR, "Failed to allocate new Rx queue object\n");
1139                 return -ENOMEM;
1140         }
1141
1142         /* save back pointers to AVP and Ethernet devices */
1143         rxq->avp = avp;
1144         rxq->dev_data = eth_dev->data;
1145         eth_dev->data->rx_queues[rx_queue_id] = (void *)rxq;
1146
1147         /* setup the queue receive mapping for the current queue. */
1148         _avp_set_rx_queue_mappings(eth_dev, rx_queue_id);
1149
1150         PMD_DRV_LOG(DEBUG, "Rx queue %u setup at %p\n", rx_queue_id, rxq);
1151
1152         (void)nb_rx_desc;
1153         (void)rx_conf;
1154         return 0;
1155 }
1156
1157 static int
1158 avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev,
1159                        uint16_t tx_queue_id,
1160                        uint16_t nb_tx_desc,
1161                        unsigned int socket_id,
1162                        const struct rte_eth_txconf *tx_conf)
1163 {
1164         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1165         struct avp_queue *txq;
1166
1167         if (tx_queue_id >= eth_dev->data->nb_tx_queues) {
1168                 PMD_DRV_LOG(ERR, "TX queue id is out of range: tx_queue_id=%u, nb_tx_queues=%u\n",
1169                             tx_queue_id, eth_dev->data->nb_tx_queues);
1170                 return -EINVAL;
1171         }
1172
1173         /* allocate a queue object */
1174         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct avp_queue),
1175                                  RTE_CACHE_LINE_SIZE, socket_id);
1176         if (txq == NULL) {
1177                 PMD_DRV_LOG(ERR, "Failed to allocate new Tx queue object\n");
1178                 return -ENOMEM;
1179         }
1180
1181         /* only the configured set of transmit queues are used */
1182         txq->queue_id = tx_queue_id;
1183         txq->queue_base = tx_queue_id;
1184         txq->queue_limit = tx_queue_id;
1185
1186         /* save back pointers to AVP and Ethernet devices */
1187         txq->avp = avp;
1188         txq->dev_data = eth_dev->data;
1189         eth_dev->data->tx_queues[tx_queue_id] = (void *)txq;
1190
1191         PMD_DRV_LOG(DEBUG, "Tx queue %u setup at %p\n", tx_queue_id, txq);
1192
1193         (void)nb_tx_desc;
1194         (void)tx_conf;
1195         return 0;
1196 }
1197
1198 static inline int
1199 _avp_cmp_ether_addr(struct rte_ether_addr *a, struct rte_ether_addr *b)
1200 {
1201         uint16_t *_a = (uint16_t *)&a->addr_bytes[0];
1202         uint16_t *_b = (uint16_t *)&b->addr_bytes[0];
1203         return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]);
1204 }
1205
1206 static inline int
1207 _avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m)
1208 {
1209         struct rte_ether_hdr *eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
1210
1211         if (likely(_avp_cmp_ether_addr(&avp->ethaddr, &eth->d_addr) == 0)) {
1212                 /* allow all packets destined to our address */
1213                 return 0;
1214         }
1215
1216         if (likely(rte_is_broadcast_ether_addr(&eth->d_addr))) {
1217                 /* allow all broadcast packets */
1218                 return 0;
1219         }
1220
1221         if (likely(rte_is_multicast_ether_addr(&eth->d_addr))) {
1222                 /* allow all multicast packets */
1223                 return 0;
1224         }
1225
1226         if (avp->flags & AVP_F_PROMISC) {
1227                 /* allow all packets when in promiscuous mode */
1228                 return 0;
1229         }
1230
1231         return -1;
1232 }
1233
1234 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1235 static inline void
1236 __avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf)
1237 {
1238         struct rte_avp_desc *first_buf;
1239         struct rte_avp_desc *pkt_buf;
1240         unsigned int pkt_len;
1241         unsigned int nb_segs;
1242         void *pkt_data;
1243         unsigned int i;
1244
1245         first_buf = avp_dev_translate_buffer(avp, buf);
1246
1247         i = 0;
1248         pkt_len = 0;
1249         nb_segs = first_buf->nb_segs;
1250         do {
1251                 /* Adjust pointers for guest addressing */
1252                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1253                 if (pkt_buf == NULL)
1254                         rte_panic("bad buffer: segment %u has an invalid address %p\n",
1255                                   i, buf);
1256                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1257                 if (pkt_data == NULL)
1258                         rte_panic("bad buffer: segment %u has a NULL data pointer\n",
1259                                   i);
1260                 if (pkt_buf->data_len == 0)
1261                         rte_panic("bad buffer: segment %u has 0 data length\n",
1262                                   i);
1263                 pkt_len += pkt_buf->data_len;
1264                 nb_segs--;
1265                 i++;
1266
1267         } while (nb_segs && (buf = pkt_buf->next) != NULL);
1268
1269         if (nb_segs != 0)
1270                 rte_panic("bad buffer: expected %u segments found %u\n",
1271                           first_buf->nb_segs, (first_buf->nb_segs - nb_segs));
1272         if (pkt_len != first_buf->pkt_len)
1273                 rte_panic("bad buffer: expected length %u found %u\n",
1274                           first_buf->pkt_len, pkt_len);
1275 }
1276
1277 #define avp_dev_buffer_sanity_check(a, b) \
1278         __avp_dev_buffer_sanity_check((a), (b))
1279
1280 #else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */
1281
1282 #define avp_dev_buffer_sanity_check(a, b) do {} while (0)
1283
1284 #endif
1285
1286 /*
1287  * Copy a host buffer chain to a set of mbufs.  This function assumes that
1288  * there exactly the required number of mbufs to copy all source bytes.
1289  */
1290 static inline struct rte_mbuf *
1291 avp_dev_copy_from_buffers(struct avp_dev *avp,
1292                           struct rte_avp_desc *buf,
1293                           struct rte_mbuf **mbufs,
1294                           unsigned int count)
1295 {
1296         struct rte_mbuf *m_previous = NULL;
1297         struct rte_avp_desc *pkt_buf;
1298         unsigned int total_length = 0;
1299         unsigned int copy_length;
1300         unsigned int src_offset;
1301         struct rte_mbuf *m;
1302         uint16_t ol_flags;
1303         uint16_t vlan_tci;
1304         void *pkt_data;
1305         unsigned int i;
1306
1307         avp_dev_buffer_sanity_check(avp, buf);
1308
1309         /* setup the first source buffer */
1310         pkt_buf = avp_dev_translate_buffer(avp, buf);
1311         pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1312         total_length = pkt_buf->pkt_len;
1313         src_offset = 0;
1314
1315         if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1316                 ol_flags = PKT_RX_VLAN;
1317                 vlan_tci = pkt_buf->vlan_tci;
1318         } else {
1319                 ol_flags = 0;
1320                 vlan_tci = 0;
1321         }
1322
1323         for (i = 0; (i < count) && (buf != NULL); i++) {
1324                 /* fill each destination buffer */
1325                 m = mbufs[i];
1326
1327                 if (m_previous != NULL)
1328                         m_previous->next = m;
1329
1330                 m_previous = m;
1331
1332                 do {
1333                         /*
1334                          * Copy as many source buffers as will fit in the
1335                          * destination buffer.
1336                          */
1337                         copy_length = RTE_MIN((avp->guest_mbuf_size -
1338                                                rte_pktmbuf_data_len(m)),
1339                                               (pkt_buf->data_len -
1340                                                src_offset));
1341                         rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1342                                                rte_pktmbuf_data_len(m)),
1343                                    RTE_PTR_ADD(pkt_data, src_offset),
1344                                    copy_length);
1345                         rte_pktmbuf_data_len(m) += copy_length;
1346                         src_offset += copy_length;
1347
1348                         if (likely(src_offset == pkt_buf->data_len)) {
1349                                 /* need a new source buffer */
1350                                 buf = pkt_buf->next;
1351                                 if (buf != NULL) {
1352                                         pkt_buf = avp_dev_translate_buffer(
1353                                                 avp, buf);
1354                                         pkt_data = avp_dev_translate_buffer(
1355                                                 avp, pkt_buf->data);
1356                                         src_offset = 0;
1357                                 }
1358                         }
1359
1360                         if (unlikely(rte_pktmbuf_data_len(m) ==
1361                                      avp->guest_mbuf_size)) {
1362                                 /* need a new destination mbuf */
1363                                 break;
1364                         }
1365
1366                 } while (buf != NULL);
1367         }
1368
1369         m = mbufs[0];
1370         m->ol_flags = ol_flags;
1371         m->nb_segs = count;
1372         rte_pktmbuf_pkt_len(m) = total_length;
1373         m->vlan_tci = vlan_tci;
1374
1375         __rte_mbuf_sanity_check(m, 1);
1376
1377         return m;
1378 }
1379
1380 static uint16_t
1381 avp_recv_scattered_pkts(void *rx_queue,
1382                         struct rte_mbuf **rx_pkts,
1383                         uint16_t nb_pkts)
1384 {
1385         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1386         struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1387         struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS];
1388         struct avp_dev *avp = rxq->avp;
1389         struct rte_avp_desc *pkt_buf;
1390         struct rte_avp_fifo *free_q;
1391         struct rte_avp_fifo *rx_q;
1392         struct rte_avp_desc *buf;
1393         unsigned int count, avail, n;
1394         unsigned int guest_mbuf_size;
1395         struct rte_mbuf *m;
1396         unsigned int required;
1397         unsigned int buf_len;
1398         unsigned int port_id;
1399         unsigned int i;
1400
1401         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1402                 /* VM live migration in progress */
1403                 return 0;
1404         }
1405
1406         guest_mbuf_size = avp->guest_mbuf_size;
1407         port_id = avp->port_id;
1408         rx_q = avp->rx_q[rxq->queue_id];
1409         free_q = avp->free_q[rxq->queue_id];
1410
1411         /* setup next queue to service */
1412         rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1413                 (rxq->queue_id + 1) : rxq->queue_base;
1414
1415         /* determine how many slots are available in the free queue */
1416         count = avp_fifo_free_count(free_q);
1417
1418         /* determine how many packets are available in the rx queue */
1419         avail = avp_fifo_count(rx_q);
1420
1421         /* determine how many packets can be received */
1422         count = RTE_MIN(count, avail);
1423         count = RTE_MIN(count, nb_pkts);
1424         count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1425
1426         if (unlikely(count == 0)) {
1427                 /* no free buffers, or no buffers on the rx queue */
1428                 return 0;
1429         }
1430
1431         /* retrieve pending packets */
1432         n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1433         PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1434                    count, rx_q);
1435
1436         count = 0;
1437         for (i = 0; i < n; i++) {
1438                 /* prefetch next entry while processing current one */
1439                 if (i + 1 < n) {
1440                         pkt_buf = avp_dev_translate_buffer(avp,
1441                                                            avp_bufs[i + 1]);
1442                         rte_prefetch0(pkt_buf);
1443                 }
1444                 buf = avp_bufs[i];
1445
1446                 /* Peek into the first buffer to determine the total length */
1447                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1448                 buf_len = pkt_buf->pkt_len;
1449
1450                 /* Allocate enough mbufs to receive the entire packet */
1451                 required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size;
1452                 if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) {
1453                         rxq->dev_data->rx_mbuf_alloc_failed++;
1454                         continue;
1455                 }
1456
1457                 /* Copy the data from the buffers to our mbufs */
1458                 m = avp_dev_copy_from_buffers(avp, buf, mbufs, required);
1459
1460                 /* finalize mbuf */
1461                 m->port = port_id;
1462
1463                 if (_avp_mac_filter(avp, m) != 0) {
1464                         /* silently discard packets not destined to our MAC */
1465                         rte_pktmbuf_free(m);
1466                         continue;
1467                 }
1468
1469                 /* return new mbuf to caller */
1470                 rx_pkts[count++] = m;
1471                 rxq->bytes += buf_len;
1472         }
1473
1474         rxq->packets += count;
1475
1476         /* return the buffers to the free queue */
1477         avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1478
1479         return count;
1480 }
1481
1482
1483 static uint16_t
1484 avp_recv_pkts(void *rx_queue,
1485               struct rte_mbuf **rx_pkts,
1486               uint16_t nb_pkts)
1487 {
1488         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1489         struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1490         struct avp_dev *avp = rxq->avp;
1491         struct rte_avp_desc *pkt_buf;
1492         struct rte_avp_fifo *free_q;
1493         struct rte_avp_fifo *rx_q;
1494         unsigned int count, avail, n;
1495         unsigned int pkt_len;
1496         struct rte_mbuf *m;
1497         char *pkt_data;
1498         unsigned int i;
1499
1500         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1501                 /* VM live migration in progress */
1502                 return 0;
1503         }
1504
1505         rx_q = avp->rx_q[rxq->queue_id];
1506         free_q = avp->free_q[rxq->queue_id];
1507
1508         /* setup next queue to service */
1509         rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1510                 (rxq->queue_id + 1) : rxq->queue_base;
1511
1512         /* determine how many slots are available in the free queue */
1513         count = avp_fifo_free_count(free_q);
1514
1515         /* determine how many packets are available in the rx queue */
1516         avail = avp_fifo_count(rx_q);
1517
1518         /* determine how many packets can be received */
1519         count = RTE_MIN(count, avail);
1520         count = RTE_MIN(count, nb_pkts);
1521         count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1522
1523         if (unlikely(count == 0)) {
1524                 /* no free buffers, or no buffers on the rx queue */
1525                 return 0;
1526         }
1527
1528         /* retrieve pending packets */
1529         n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1530         PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1531                    count, rx_q);
1532
1533         count = 0;
1534         for (i = 0; i < n; i++) {
1535                 /* prefetch next entry while processing current one */
1536                 if (i < n - 1) {
1537                         pkt_buf = avp_dev_translate_buffer(avp,
1538                                                            avp_bufs[i + 1]);
1539                         rte_prefetch0(pkt_buf);
1540                 }
1541
1542                 /* Adjust host pointers for guest addressing */
1543                 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1544                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1545                 pkt_len = pkt_buf->pkt_len;
1546
1547                 if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1548                              (pkt_buf->nb_segs > 1))) {
1549                         /*
1550                          * application should be using the scattered receive
1551                          * function
1552                          */
1553                         rxq->errors++;
1554                         continue;
1555                 }
1556
1557                 /* process each packet to be transmitted */
1558                 m = rte_pktmbuf_alloc(avp->pool);
1559                 if (unlikely(m == NULL)) {
1560                         rxq->dev_data->rx_mbuf_alloc_failed++;
1561                         continue;
1562                 }
1563
1564                 /* copy data out of the host buffer to our buffer */
1565                 m->data_off = RTE_PKTMBUF_HEADROOM;
1566                 rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len);
1567
1568                 /* initialize the local mbuf */
1569                 rte_pktmbuf_data_len(m) = pkt_len;
1570                 rte_pktmbuf_pkt_len(m) = pkt_len;
1571                 m->port = avp->port_id;
1572
1573                 if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1574                         m->ol_flags = PKT_RX_VLAN;
1575                         m->vlan_tci = pkt_buf->vlan_tci;
1576                 }
1577
1578                 if (_avp_mac_filter(avp, m) != 0) {
1579                         /* silently discard packets not destined to our MAC */
1580                         rte_pktmbuf_free(m);
1581                         continue;
1582                 }
1583
1584                 /* return new mbuf to caller */
1585                 rx_pkts[count++] = m;
1586                 rxq->bytes += pkt_len;
1587         }
1588
1589         rxq->packets += count;
1590
1591         /* return the buffers to the free queue */
1592         avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1593
1594         return count;
1595 }
1596
1597 /*
1598  * Copy a chained mbuf to a set of host buffers.  This function assumes that
1599  * there are sufficient destination buffers to contain the entire source
1600  * packet.
1601  */
1602 static inline uint16_t
1603 avp_dev_copy_to_buffers(struct avp_dev *avp,
1604                         struct rte_mbuf *mbuf,
1605                         struct rte_avp_desc **buffers,
1606                         unsigned int count)
1607 {
1608         struct rte_avp_desc *previous_buf = NULL;
1609         struct rte_avp_desc *first_buf = NULL;
1610         struct rte_avp_desc *pkt_buf;
1611         struct rte_avp_desc *buf;
1612         size_t total_length;
1613         struct rte_mbuf *m;
1614         size_t copy_length;
1615         size_t src_offset;
1616         char *pkt_data;
1617         unsigned int i;
1618
1619         __rte_mbuf_sanity_check(mbuf, 1);
1620
1621         m = mbuf;
1622         src_offset = 0;
1623         total_length = rte_pktmbuf_pkt_len(m);
1624         for (i = 0; (i < count) && (m != NULL); i++) {
1625                 /* fill each destination buffer */
1626                 buf = buffers[i];
1627
1628                 if (i < count - 1) {
1629                         /* prefetch next entry while processing this one */
1630                         pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]);
1631                         rte_prefetch0(pkt_buf);
1632                 }
1633
1634                 /* Adjust pointers for guest addressing */
1635                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1636                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1637
1638                 /* setup the buffer chain */
1639                 if (previous_buf != NULL)
1640                         previous_buf->next = buf;
1641                 else
1642                         first_buf = pkt_buf;
1643
1644                 previous_buf = pkt_buf;
1645
1646                 do {
1647                         /*
1648                          * copy as many source mbuf segments as will fit in the
1649                          * destination buffer.
1650                          */
1651                         copy_length = RTE_MIN((avp->host_mbuf_size -
1652                                                pkt_buf->data_len),
1653                                               (rte_pktmbuf_data_len(m) -
1654                                                src_offset));
1655                         rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len),
1656                                    RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1657                                                src_offset),
1658                                    copy_length);
1659                         pkt_buf->data_len += copy_length;
1660                         src_offset += copy_length;
1661
1662                         if (likely(src_offset == rte_pktmbuf_data_len(m))) {
1663                                 /* need a new source buffer */
1664                                 m = m->next;
1665                                 src_offset = 0;
1666                         }
1667
1668                         if (unlikely(pkt_buf->data_len ==
1669                                      avp->host_mbuf_size)) {
1670                                 /* need a new destination buffer */
1671                                 break;
1672                         }
1673
1674                 } while (m != NULL);
1675         }
1676
1677         first_buf->nb_segs = count;
1678         first_buf->pkt_len = total_length;
1679
1680         if (mbuf->ol_flags & PKT_TX_VLAN_PKT) {
1681                 first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1682                 first_buf->vlan_tci = mbuf->vlan_tci;
1683         }
1684
1685         avp_dev_buffer_sanity_check(avp, buffers[0]);
1686
1687         return total_length;
1688 }
1689
1690
1691 static uint16_t
1692 avp_xmit_scattered_pkts(void *tx_queue,
1693                         struct rte_mbuf **tx_pkts,
1694                         uint16_t nb_pkts)
1695 {
1696         struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST *
1697                                        RTE_AVP_MAX_MBUF_SEGMENTS)];
1698         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1699         struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST];
1700         struct avp_dev *avp = txq->avp;
1701         struct rte_avp_fifo *alloc_q;
1702         struct rte_avp_fifo *tx_q;
1703         unsigned int count, avail, n;
1704         unsigned int orig_nb_pkts;
1705         struct rte_mbuf *m;
1706         unsigned int required;
1707         unsigned int segments;
1708         unsigned int tx_bytes;
1709         unsigned int i;
1710
1711         orig_nb_pkts = nb_pkts;
1712         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1713                 /* VM live migration in progress */
1714                 /* TODO ... buffer for X packets then drop? */
1715                 txq->errors += nb_pkts;
1716                 return 0;
1717         }
1718
1719         tx_q = avp->tx_q[txq->queue_id];
1720         alloc_q = avp->alloc_q[txq->queue_id];
1721
1722         /* limit the number of transmitted packets to the max burst size */
1723         if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1724                 nb_pkts = AVP_MAX_TX_BURST;
1725
1726         /* determine how many buffers are available to copy into */
1727         avail = avp_fifo_count(alloc_q);
1728         if (unlikely(avail > (AVP_MAX_TX_BURST *
1729                               RTE_AVP_MAX_MBUF_SEGMENTS)))
1730                 avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS;
1731
1732         /* determine how many slots are available in the transmit queue */
1733         count = avp_fifo_free_count(tx_q);
1734
1735         /* determine how many packets can be sent */
1736         nb_pkts = RTE_MIN(count, nb_pkts);
1737
1738         /* determine how many packets will fit in the available buffers */
1739         count = 0;
1740         segments = 0;
1741         for (i = 0; i < nb_pkts; i++) {
1742                 m = tx_pkts[i];
1743                 if (likely(i < (unsigned int)nb_pkts - 1)) {
1744                         /* prefetch next entry while processing this one */
1745                         rte_prefetch0(tx_pkts[i + 1]);
1746                 }
1747                 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1748                         avp->host_mbuf_size;
1749
1750                 if (unlikely((required == 0) ||
1751                              (required > RTE_AVP_MAX_MBUF_SEGMENTS)))
1752                         break;
1753                 else if (unlikely(required + segments > avail))
1754                         break;
1755                 segments += required;
1756                 count++;
1757         }
1758         nb_pkts = count;
1759
1760         if (unlikely(nb_pkts == 0)) {
1761                 /* no available buffers, or no space on the tx queue */
1762                 txq->errors += orig_nb_pkts;
1763                 return 0;
1764         }
1765
1766         PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1767                    nb_pkts, tx_q);
1768
1769         /* retrieve sufficient send buffers */
1770         n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments);
1771         if (unlikely(n != segments)) {
1772                 PMD_TX_LOG(DEBUG, "Failed to allocate buffers "
1773                            "n=%u, segments=%u, orig=%u\n",
1774                            n, segments, orig_nb_pkts);
1775                 txq->errors += orig_nb_pkts;
1776                 return 0;
1777         }
1778
1779         tx_bytes = 0;
1780         count = 0;
1781         for (i = 0; i < nb_pkts; i++) {
1782                 /* process each packet to be transmitted */
1783                 m = tx_pkts[i];
1784
1785                 /* determine how many buffers are required for this packet */
1786                 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1787                         avp->host_mbuf_size;
1788
1789                 tx_bytes += avp_dev_copy_to_buffers(avp, m,
1790                                                     &avp_bufs[count], required);
1791                 tx_bufs[i] = avp_bufs[count];
1792                 count += required;
1793
1794                 /* free the original mbuf */
1795                 rte_pktmbuf_free(m);
1796         }
1797
1798         txq->packets += nb_pkts;
1799         txq->bytes += tx_bytes;
1800
1801 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1802         for (i = 0; i < nb_pkts; i++)
1803                 avp_dev_buffer_sanity_check(avp, tx_bufs[i]);
1804 #endif
1805
1806         /* send the packets */
1807         n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts);
1808         if (unlikely(n != orig_nb_pkts))
1809                 txq->errors += (orig_nb_pkts - n);
1810
1811         return n;
1812 }
1813
1814
1815 static uint16_t
1816 avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1817 {
1818         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1819         struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST];
1820         struct avp_dev *avp = txq->avp;
1821         struct rte_avp_desc *pkt_buf;
1822         struct rte_avp_fifo *alloc_q;
1823         struct rte_avp_fifo *tx_q;
1824         unsigned int count, avail, n;
1825         struct rte_mbuf *m;
1826         unsigned int pkt_len;
1827         unsigned int tx_bytes;
1828         char *pkt_data;
1829         unsigned int i;
1830
1831         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1832                 /* VM live migration in progress */
1833                 /* TODO ... buffer for X packets then drop?! */
1834                 txq->errors++;
1835                 return 0;
1836         }
1837
1838         tx_q = avp->tx_q[txq->queue_id];
1839         alloc_q = avp->alloc_q[txq->queue_id];
1840
1841         /* limit the number of transmitted packets to the max burst size */
1842         if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1843                 nb_pkts = AVP_MAX_TX_BURST;
1844
1845         /* determine how many buffers are available to copy into */
1846         avail = avp_fifo_count(alloc_q);
1847
1848         /* determine how many slots are available in the transmit queue */
1849         count = avp_fifo_free_count(tx_q);
1850
1851         /* determine how many packets can be sent */
1852         count = RTE_MIN(count, avail);
1853         count = RTE_MIN(count, nb_pkts);
1854
1855         if (unlikely(count == 0)) {
1856                 /* no available buffers, or no space on the tx queue */
1857                 txq->errors += nb_pkts;
1858                 return 0;
1859         }
1860
1861         PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1862                    count, tx_q);
1863
1864         /* retrieve sufficient send buffers */
1865         n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count);
1866         if (unlikely(n != count)) {
1867                 txq->errors++;
1868                 return 0;
1869         }
1870
1871         tx_bytes = 0;
1872         for (i = 0; i < count; i++) {
1873                 /* prefetch next entry while processing the current one */
1874                 if (i < count - 1) {
1875                         pkt_buf = avp_dev_translate_buffer(avp,
1876                                                            avp_bufs[i + 1]);
1877                         rte_prefetch0(pkt_buf);
1878                 }
1879
1880                 /* process each packet to be transmitted */
1881                 m = tx_pkts[i];
1882
1883                 /* Adjust pointers for guest addressing */
1884                 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1885                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1886                 pkt_len = rte_pktmbuf_pkt_len(m);
1887
1888                 if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1889                                          (pkt_len > avp->host_mbuf_size))) {
1890                         /*
1891                          * application should be using the scattered transmit
1892                          * function; send it truncated to avoid the performance
1893                          * hit of having to manage returning the already
1894                          * allocated buffer to the free list.  This should not
1895                          * happen since the application should have set the
1896                          * max_rx_pkt_len based on its MTU and it should be
1897                          * policing its own packet sizes.
1898                          */
1899                         txq->errors++;
1900                         pkt_len = RTE_MIN(avp->guest_mbuf_size,
1901                                           avp->host_mbuf_size);
1902                 }
1903
1904                 /* copy data out of our mbuf and into the AVP buffer */
1905                 rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len);
1906                 pkt_buf->pkt_len = pkt_len;
1907                 pkt_buf->data_len = pkt_len;
1908                 pkt_buf->nb_segs = 1;
1909                 pkt_buf->next = NULL;
1910
1911                 if (m->ol_flags & PKT_TX_VLAN_PKT) {
1912                         pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1913                         pkt_buf->vlan_tci = m->vlan_tci;
1914                 }
1915
1916                 tx_bytes += pkt_len;
1917
1918                 /* free the original mbuf */
1919                 rte_pktmbuf_free(m);
1920         }
1921
1922         txq->packets += count;
1923         txq->bytes += tx_bytes;
1924
1925         /* send the packets */
1926         n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count);
1927
1928         return n;
1929 }
1930
1931 static void
1932 avp_dev_rx_queue_release(void *rx_queue)
1933 {
1934         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1935         struct avp_dev *avp = rxq->avp;
1936         struct rte_eth_dev_data *data = avp->dev_data;
1937         unsigned int i;
1938
1939         for (i = 0; i < avp->num_rx_queues; i++) {
1940                 if (data->rx_queues[i] == rxq) {
1941                         rte_free(data->rx_queues[i]);
1942                         data->rx_queues[i] = NULL;
1943                 }
1944         }
1945 }
1946
1947 static void
1948 avp_dev_rx_queue_release_all(struct rte_eth_dev *eth_dev)
1949 {
1950         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1951         struct rte_eth_dev_data *data = avp->dev_data;
1952         unsigned int i;
1953
1954         for (i = 0; i < avp->num_rx_queues; i++) {
1955                 if (data->rx_queues[i]) {
1956                         rte_free(data->rx_queues[i]);
1957                         data->rx_queues[i] = NULL;
1958                 }
1959         }
1960 }
1961
1962 static void
1963 avp_dev_tx_queue_release(void *tx_queue)
1964 {
1965         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1966         struct avp_dev *avp = txq->avp;
1967         struct rte_eth_dev_data *data = avp->dev_data;
1968         unsigned int i;
1969
1970         for (i = 0; i < avp->num_tx_queues; i++) {
1971                 if (data->tx_queues[i] == txq) {
1972                         rte_free(data->tx_queues[i]);
1973                         data->tx_queues[i] = NULL;
1974                 }
1975         }
1976 }
1977
1978 static void
1979 avp_dev_tx_queue_release_all(struct rte_eth_dev *eth_dev)
1980 {
1981         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1982         struct rte_eth_dev_data *data = avp->dev_data;
1983         unsigned int i;
1984
1985         for (i = 0; i < avp->num_tx_queues; i++) {
1986                 if (data->tx_queues[i]) {
1987                         rte_free(data->tx_queues[i]);
1988                         data->tx_queues[i] = NULL;
1989                 }
1990         }
1991 }
1992
1993 static int
1994 avp_dev_configure(struct rte_eth_dev *eth_dev)
1995 {
1996         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
1997         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1998         struct rte_avp_device_info *host_info;
1999         struct rte_avp_device_config config;
2000         int mask = 0;
2001         void *addr;
2002         int ret;
2003
2004         rte_spinlock_lock(&avp->lock);
2005         if (avp->flags & AVP_F_DETACHED) {
2006                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2007                 ret = -ENOTSUP;
2008                 goto unlock;
2009         }
2010
2011         addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
2012         host_info = (struct rte_avp_device_info *)addr;
2013
2014         /* Setup required number of queues */
2015         _avp_set_queue_counts(eth_dev);
2016
2017         mask = (ETH_VLAN_STRIP_MASK |
2018                 ETH_VLAN_FILTER_MASK |
2019                 ETH_VLAN_EXTEND_MASK);
2020         ret = avp_vlan_offload_set(eth_dev, mask);
2021         if (ret < 0) {
2022                 PMD_DRV_LOG(ERR, "VLAN offload set failed by host, ret=%d\n",
2023                             ret);
2024                 goto unlock;
2025         }
2026
2027         /* update device config */
2028         memset(&config, 0, sizeof(config));
2029         config.device_id = host_info->device_id;
2030         config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
2031         config.driver_version = AVP_DPDK_DRIVER_VERSION;
2032         config.features = avp->features;
2033         config.num_tx_queues = avp->num_tx_queues;
2034         config.num_rx_queues = avp->num_rx_queues;
2035
2036         ret = avp_dev_ctrl_set_config(eth_dev, &config);
2037         if (ret < 0) {
2038                 PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
2039                             ret);
2040                 goto unlock;
2041         }
2042
2043         avp->flags |= AVP_F_CONFIGURED;
2044         ret = 0;
2045
2046 unlock:
2047         rte_spinlock_unlock(&avp->lock);
2048         return ret;
2049 }
2050
2051 static int
2052 avp_dev_start(struct rte_eth_dev *eth_dev)
2053 {
2054         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2055         int ret;
2056
2057         rte_spinlock_lock(&avp->lock);
2058         if (avp->flags & AVP_F_DETACHED) {
2059                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2060                 ret = -ENOTSUP;
2061                 goto unlock;
2062         }
2063
2064         /* update link state */
2065         ret = avp_dev_ctrl_set_link_state(eth_dev, 1);
2066         if (ret < 0) {
2067                 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2068                             ret);
2069                 goto unlock;
2070         }
2071
2072         /* remember current link state */
2073         avp->flags |= AVP_F_LINKUP;
2074
2075         ret = 0;
2076
2077 unlock:
2078         rte_spinlock_unlock(&avp->lock);
2079         return ret;
2080 }
2081
2082 static void
2083 avp_dev_stop(struct rte_eth_dev *eth_dev)
2084 {
2085         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2086         int ret;
2087
2088         rte_spinlock_lock(&avp->lock);
2089         if (avp->flags & AVP_F_DETACHED) {
2090                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2091                 goto unlock;
2092         }
2093
2094         /* remember current link state */
2095         avp->flags &= ~AVP_F_LINKUP;
2096
2097         /* update link state */
2098         ret = avp_dev_ctrl_set_link_state(eth_dev, 0);
2099         if (ret < 0) {
2100                 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2101                             ret);
2102         }
2103
2104 unlock:
2105         rte_spinlock_unlock(&avp->lock);
2106 }
2107
2108 static void
2109 avp_dev_close(struct rte_eth_dev *eth_dev)
2110 {
2111         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2112         int ret;
2113
2114         rte_spinlock_lock(&avp->lock);
2115         if (avp->flags & AVP_F_DETACHED) {
2116                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2117                 goto unlock;
2118         }
2119
2120         /* remember current link state */
2121         avp->flags &= ~AVP_F_LINKUP;
2122         avp->flags &= ~AVP_F_CONFIGURED;
2123
2124         ret = avp_dev_disable_interrupts(eth_dev);
2125         if (ret < 0) {
2126                 PMD_DRV_LOG(ERR, "Failed to disable interrupts\n");
2127                 /* continue */
2128         }
2129
2130         /* update device state */
2131         ret = avp_dev_ctrl_shutdown(eth_dev);
2132         if (ret < 0) {
2133                 PMD_DRV_LOG(ERR, "Device shutdown failed by host, ret=%d\n",
2134                             ret);
2135                 /* continue */
2136         }
2137
2138         /* release dynamic storage for rx/tx queues */
2139         avp_dev_rx_queue_release_all(eth_dev);
2140         avp_dev_tx_queue_release_all(eth_dev);
2141
2142 unlock:
2143         rte_spinlock_unlock(&avp->lock);
2144 }
2145
2146 static int
2147 avp_dev_link_update(struct rte_eth_dev *eth_dev,
2148                                         __rte_unused int wait_to_complete)
2149 {
2150         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2151         struct rte_eth_link *link = &eth_dev->data->dev_link;
2152
2153         link->link_speed = ETH_SPEED_NUM_10G;
2154         link->link_duplex = ETH_LINK_FULL_DUPLEX;
2155         link->link_status = !!(avp->flags & AVP_F_LINKUP);
2156
2157         return -1;
2158 }
2159
2160 static int
2161 avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2162 {
2163         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2164
2165         rte_spinlock_lock(&avp->lock);
2166         if ((avp->flags & AVP_F_PROMISC) == 0) {
2167                 avp->flags |= AVP_F_PROMISC;
2168                 PMD_DRV_LOG(DEBUG, "Promiscuous mode enabled on %u\n",
2169                             eth_dev->data->port_id);
2170         }
2171         rte_spinlock_unlock(&avp->lock);
2172
2173         return 0;
2174 }
2175
2176 static int
2177 avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev)
2178 {
2179         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2180
2181         rte_spinlock_lock(&avp->lock);
2182         if ((avp->flags & AVP_F_PROMISC) != 0) {
2183                 avp->flags &= ~AVP_F_PROMISC;
2184                 PMD_DRV_LOG(DEBUG, "Promiscuous mode disabled on %u\n",
2185                             eth_dev->data->port_id);
2186         }
2187         rte_spinlock_unlock(&avp->lock);
2188
2189         return 0;
2190 }
2191
2192 static int
2193 avp_dev_info_get(struct rte_eth_dev *eth_dev,
2194                  struct rte_eth_dev_info *dev_info)
2195 {
2196         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2197
2198         dev_info->max_rx_queues = avp->max_rx_queues;
2199         dev_info->max_tx_queues = avp->max_tx_queues;
2200         dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE;
2201         dev_info->max_rx_pktlen = avp->max_rx_pkt_len;
2202         dev_info->max_mac_addrs = AVP_MAX_MAC_ADDRS;
2203         if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2204                 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2205                 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
2206         }
2207
2208         return 0;
2209 }
2210
2211 static int
2212 avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
2213 {
2214         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2215         struct rte_eth_conf *dev_conf = &eth_dev->data->dev_conf;
2216         uint64_t offloads = dev_conf->rxmode.offloads;
2217
2218         if (mask & ETH_VLAN_STRIP_MASK) {
2219                 if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2220                         if (offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
2221                                 avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD;
2222                         else
2223                                 avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD;
2224                 } else {
2225                         PMD_DRV_LOG(ERR, "VLAN strip offload not supported\n");
2226                 }
2227         }
2228
2229         if (mask & ETH_VLAN_FILTER_MASK) {
2230                 if (offloads & DEV_RX_OFFLOAD_VLAN_FILTER)
2231                         PMD_DRV_LOG(ERR, "VLAN filter offload not supported\n");
2232         }
2233
2234         if (mask & ETH_VLAN_EXTEND_MASK) {
2235                 if (offloads & DEV_RX_OFFLOAD_VLAN_EXTEND)
2236                         PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n");
2237         }
2238
2239         return 0;
2240 }
2241
2242 static int
2243 avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats)
2244 {
2245         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2246         unsigned int i;
2247
2248         for (i = 0; i < avp->num_rx_queues; i++) {
2249                 struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2250
2251                 if (rxq) {
2252                         stats->ipackets += rxq->packets;
2253                         stats->ibytes += rxq->bytes;
2254                         stats->ierrors += rxq->errors;
2255
2256                         stats->q_ipackets[i] += rxq->packets;
2257                         stats->q_ibytes[i] += rxq->bytes;
2258                         stats->q_errors[i] += rxq->errors;
2259                 }
2260         }
2261
2262         for (i = 0; i < avp->num_tx_queues; i++) {
2263                 struct avp_queue *txq = avp->dev_data->tx_queues[i];
2264
2265                 if (txq) {
2266                         stats->opackets += txq->packets;
2267                         stats->obytes += txq->bytes;
2268                         stats->oerrors += txq->errors;
2269
2270                         stats->q_opackets[i] += txq->packets;
2271                         stats->q_obytes[i] += txq->bytes;
2272                 }
2273         }
2274
2275         return 0;
2276 }
2277
2278 static int
2279 avp_dev_stats_reset(struct rte_eth_dev *eth_dev)
2280 {
2281         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2282         unsigned int i;
2283
2284         for (i = 0; i < avp->num_rx_queues; i++) {
2285                 struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2286
2287                 if (rxq) {
2288                         rxq->bytes = 0;
2289                         rxq->packets = 0;
2290                         rxq->errors = 0;
2291                 }
2292         }
2293
2294         for (i = 0; i < avp->num_tx_queues; i++) {
2295                 struct avp_queue *txq = avp->dev_data->tx_queues[i];
2296
2297                 if (txq) {
2298                         txq->bytes = 0;
2299                         txq->packets = 0;
2300                         txq->errors = 0;
2301                 }
2302         }
2303
2304         return 0;
2305 }
2306
2307 RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd);
2308 RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map);
2309
2310 RTE_INIT(avp_init_log)
2311 {
2312         avp_logtype_driver = rte_log_register("pmd.net.avp.driver");
2313         if (avp_logtype_driver >= 0)
2314                 rte_log_set_level(avp_logtype_driver, RTE_LOG_NOTICE);
2315 }