drivers/net: remove unnecessary macro for unused variables
[dpdk.git] / drivers / net / avp / avp_ethdev.c
1 /*
2  *   BSD LICENSE
3  *
4  * Copyright (c) 2013-2017, Wind River Systems, Inc.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1) Redistributions of source code must retain the above copyright notice,
10  * this list of conditions and the following disclaimer.
11  *
12  * 2) Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * 3) Neither the name of Wind River Systems nor the names of its contributors
17  * may be used to endorse or promote products derived from this software
18  * without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <stdint.h>
34 #include <string.h>
35 #include <stdio.h>
36 #include <errno.h>
37 #include <unistd.h>
38
39 #include <rte_ethdev.h>
40 #include <rte_ethdev_pci.h>
41 #include <rte_memcpy.h>
42 #include <rte_string_fns.h>
43 #include <rte_memzone.h>
44 #include <rte_malloc.h>
45 #include <rte_atomic.h>
46 #include <rte_branch_prediction.h>
47 #include <rte_pci.h>
48 #include <rte_ether.h>
49 #include <rte_common.h>
50 #include <rte_cycles.h>
51 #include <rte_spinlock.h>
52 #include <rte_byteorder.h>
53 #include <rte_dev.h>
54 #include <rte_memory.h>
55 #include <rte_eal.h>
56 #include <rte_io.h>
57
58 #include "rte_avp_common.h"
59 #include "rte_avp_fifo.h"
60
61 #include "avp_logs.h"
62
63
64 static int avp_dev_create(struct rte_pci_device *pci_dev,
65                           struct rte_eth_dev *eth_dev);
66
67 static int avp_dev_configure(struct rte_eth_dev *dev);
68 static int avp_dev_start(struct rte_eth_dev *dev);
69 static void avp_dev_stop(struct rte_eth_dev *dev);
70 static void avp_dev_close(struct rte_eth_dev *dev);
71 static void avp_dev_info_get(struct rte_eth_dev *dev,
72                              struct rte_eth_dev_info *dev_info);
73 static void avp_vlan_offload_set(struct rte_eth_dev *dev, int mask);
74 static int avp_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete);
75 static void avp_dev_promiscuous_enable(struct rte_eth_dev *dev);
76 static void avp_dev_promiscuous_disable(struct rte_eth_dev *dev);
77
78 static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev,
79                                   uint16_t rx_queue_id,
80                                   uint16_t nb_rx_desc,
81                                   unsigned int socket_id,
82                                   const struct rte_eth_rxconf *rx_conf,
83                                   struct rte_mempool *pool);
84
85 static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev,
86                                   uint16_t tx_queue_id,
87                                   uint16_t nb_tx_desc,
88                                   unsigned int socket_id,
89                                   const struct rte_eth_txconf *tx_conf);
90
91 static uint16_t avp_recv_scattered_pkts(void *rx_queue,
92                                         struct rte_mbuf **rx_pkts,
93                                         uint16_t nb_pkts);
94
95 static uint16_t avp_recv_pkts(void *rx_queue,
96                               struct rte_mbuf **rx_pkts,
97                               uint16_t nb_pkts);
98
99 static uint16_t avp_xmit_scattered_pkts(void *tx_queue,
100                                         struct rte_mbuf **tx_pkts,
101                                         uint16_t nb_pkts);
102
103 static uint16_t avp_xmit_pkts(void *tx_queue,
104                               struct rte_mbuf **tx_pkts,
105                               uint16_t nb_pkts);
106
107 static void avp_dev_rx_queue_release(void *rxq);
108 static void avp_dev_tx_queue_release(void *txq);
109
110 static void avp_dev_stats_get(struct rte_eth_dev *dev,
111                               struct rte_eth_stats *stats);
112 static void avp_dev_stats_reset(struct rte_eth_dev *dev);
113
114
115 #define AVP_DEV_TO_PCI(eth_dev) RTE_DEV_TO_PCI((eth_dev)->device)
116
117
118 #define AVP_MAX_RX_BURST 64
119 #define AVP_MAX_TX_BURST 64
120 #define AVP_MAX_MAC_ADDRS 1
121 #define AVP_MIN_RX_BUFSIZE ETHER_MIN_LEN
122
123
124 /*
125  * Defines the number of microseconds to wait before checking the response
126  * queue for completion.
127  */
128 #define AVP_REQUEST_DELAY_USECS (5000)
129
130 /*
131  * Defines the number times to check the response queue for completion before
132  * declaring a timeout.
133  */
134 #define AVP_MAX_REQUEST_RETRY (100)
135
136 /* Defines the current PCI driver version number */
137 #define AVP_DPDK_DRIVER_VERSION RTE_AVP_CURRENT_GUEST_VERSION
138
139 /*
140  * The set of PCI devices this driver supports
141  */
142 static const struct rte_pci_id pci_id_avp_map[] = {
143         { .vendor_id = RTE_AVP_PCI_VENDOR_ID,
144           .device_id = RTE_AVP_PCI_DEVICE_ID,
145           .subsystem_vendor_id = RTE_AVP_PCI_SUB_VENDOR_ID,
146           .subsystem_device_id = RTE_AVP_PCI_SUB_DEVICE_ID,
147           .class_id = RTE_CLASS_ANY_ID,
148         },
149
150         { .vendor_id = 0, /* sentinel */
151         },
152 };
153
154 /*
155  * dev_ops for avp, bare necessities for basic operation
156  */
157 static const struct eth_dev_ops avp_eth_dev_ops = {
158         .dev_configure       = avp_dev_configure,
159         .dev_start           = avp_dev_start,
160         .dev_stop            = avp_dev_stop,
161         .dev_close           = avp_dev_close,
162         .dev_infos_get       = avp_dev_info_get,
163         .vlan_offload_set    = avp_vlan_offload_set,
164         .stats_get           = avp_dev_stats_get,
165         .stats_reset         = avp_dev_stats_reset,
166         .link_update         = avp_dev_link_update,
167         .promiscuous_enable  = avp_dev_promiscuous_enable,
168         .promiscuous_disable = avp_dev_promiscuous_disable,
169         .rx_queue_setup      = avp_dev_rx_queue_setup,
170         .rx_queue_release    = avp_dev_rx_queue_release,
171         .tx_queue_setup      = avp_dev_tx_queue_setup,
172         .tx_queue_release    = avp_dev_tx_queue_release,
173 };
174
175 /**@{ AVP device flags */
176 #define AVP_F_PROMISC (1 << 1)
177 #define AVP_F_CONFIGURED (1 << 2)
178 #define AVP_F_LINKUP (1 << 3)
179 #define AVP_F_DETACHED (1 << 4)
180 /**@} */
181
182 /* Ethernet device validation marker */
183 #define AVP_ETHDEV_MAGIC 0x92972862
184
185 /*
186  * Defines the AVP device attributes which are attached to an RTE ethernet
187  * device
188  */
189 struct avp_dev {
190         uint32_t magic; /**< Memory validation marker */
191         uint64_t device_id; /**< Unique system identifier */
192         struct ether_addr ethaddr; /**< Host specified MAC address */
193         struct rte_eth_dev_data *dev_data;
194         /**< Back pointer to ethernet device data */
195         volatile uint32_t flags; /**< Device operational flags */
196         uint8_t port_id; /**< Ethernet port identifier */
197         struct rte_mempool *pool; /**< pkt mbuf mempool */
198         unsigned int guest_mbuf_size; /**< local pool mbuf size */
199         unsigned int host_mbuf_size; /**< host mbuf size */
200         unsigned int max_rx_pkt_len; /**< maximum receive unit */
201         uint32_t host_features; /**< Supported feature bitmap */
202         uint32_t features; /**< Enabled feature bitmap */
203         unsigned int num_tx_queues; /**< Negotiated number of transmit queues */
204         unsigned int max_tx_queues; /**< Maximum number of transmit queues */
205         unsigned int num_rx_queues; /**< Negotiated number of receive queues */
206         unsigned int max_rx_queues; /**< Maximum number of receive queues */
207
208         struct rte_avp_fifo *tx_q[RTE_AVP_MAX_QUEUES]; /**< TX queue */
209         struct rte_avp_fifo *rx_q[RTE_AVP_MAX_QUEUES]; /**< RX queue */
210         struct rte_avp_fifo *alloc_q[RTE_AVP_MAX_QUEUES];
211         /**< Allocated mbufs queue */
212         struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES];
213         /**< To be freed mbufs queue */
214
215         /* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */
216         rte_spinlock_t lock;
217
218         /* For request & response */
219         struct rte_avp_fifo *req_q; /**< Request queue */
220         struct rte_avp_fifo *resp_q; /**< Response queue */
221         void *host_sync_addr; /**< (host) Req/Resp Mem address */
222         void *sync_addr; /**< Req/Resp Mem address */
223         void *host_mbuf_addr; /**< (host) MBUF pool start address */
224         void *mbuf_addr; /**< MBUF pool start address */
225 } __rte_cache_aligned;
226
227 /* RTE ethernet private data */
228 struct avp_adapter {
229         struct avp_dev avp;
230 } __rte_cache_aligned;
231
232
233 /* 32-bit MMIO register write */
234 #define AVP_WRITE32(_value, _addr) rte_write32_relaxed((_value), (_addr))
235
236 /* 32-bit MMIO register read */
237 #define AVP_READ32(_addr) rte_read32_relaxed((_addr))
238
239 /* Macro to cast the ethernet device private data to a AVP object */
240 #define AVP_DEV_PRIVATE_TO_HW(adapter) \
241         (&((struct avp_adapter *)adapter)->avp)
242
243 /*
244  * Defines the structure of a AVP device queue for the purpose of handling the
245  * receive and transmit burst callback functions
246  */
247 struct avp_queue {
248         struct rte_eth_dev_data *dev_data;
249         /**< Backpointer to ethernet device data */
250         struct avp_dev *avp; /**< Backpointer to AVP device */
251         uint16_t queue_id;
252         /**< Queue identifier used for indexing current queue */
253         uint16_t queue_base;
254         /**< Base queue identifier for queue servicing */
255         uint16_t queue_limit;
256         /**< Maximum queue identifier for queue servicing */
257
258         uint64_t packets;
259         uint64_t bytes;
260         uint64_t errors;
261 };
262
263 /* send a request and wait for a response
264  *
265  * @warning must be called while holding the avp->lock spinlock.
266  */
267 static int
268 avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request)
269 {
270         unsigned int retry = AVP_MAX_REQUEST_RETRY;
271         void *resp_addr = NULL;
272         unsigned int count;
273         int ret;
274
275         PMD_DRV_LOG(DEBUG, "Sending request %u to host\n", request->req_id);
276
277         request->result = -ENOTSUP;
278
279         /* Discard any stale responses before starting a new request */
280         while (avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1))
281                 PMD_DRV_LOG(DEBUG, "Discarding stale response\n");
282
283         rte_memcpy(avp->sync_addr, request, sizeof(*request));
284         count = avp_fifo_put(avp->req_q, &avp->host_sync_addr, 1);
285         if (count < 1) {
286                 PMD_DRV_LOG(ERR, "Cannot send request %u to host\n",
287                             request->req_id);
288                 ret = -EBUSY;
289                 goto done;
290         }
291
292         while (retry--) {
293                 /* wait for a response */
294                 usleep(AVP_REQUEST_DELAY_USECS);
295
296                 count = avp_fifo_count(avp->resp_q);
297                 if (count >= 1) {
298                         /* response received */
299                         break;
300                 }
301
302                 if ((count < 1) && (retry == 0)) {
303                         PMD_DRV_LOG(ERR, "Timeout while waiting for a response for %u\n",
304                                     request->req_id);
305                         ret = -ETIME;
306                         goto done;
307                 }
308         }
309
310         /* retrieve the response */
311         count = avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1);
312         if ((count != 1) || (resp_addr != avp->host_sync_addr)) {
313                 PMD_DRV_LOG(ERR, "Invalid response from host, count=%u resp=%p host_sync_addr=%p\n",
314                             count, resp_addr, avp->host_sync_addr);
315                 ret = -ENODATA;
316                 goto done;
317         }
318
319         /* copy to user buffer */
320         rte_memcpy(request, avp->sync_addr, sizeof(*request));
321         ret = 0;
322
323         PMD_DRV_LOG(DEBUG, "Result %d received for request %u\n",
324                     request->result, request->req_id);
325
326 done:
327         return ret;
328 }
329
330 static int
331 avp_dev_ctrl_set_link_state(struct rte_eth_dev *eth_dev, unsigned int state)
332 {
333         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
334         struct rte_avp_request request;
335         int ret;
336
337         /* setup a link state change request */
338         memset(&request, 0, sizeof(request));
339         request.req_id = RTE_AVP_REQ_CFG_NETWORK_IF;
340         request.if_up = state;
341
342         ret = avp_dev_process_request(avp, &request);
343
344         return ret == 0 ? request.result : ret;
345 }
346
347 static int
348 avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev,
349                         struct rte_avp_device_config *config)
350 {
351         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
352         struct rte_avp_request request;
353         int ret;
354
355         /* setup a configure request */
356         memset(&request, 0, sizeof(request));
357         request.req_id = RTE_AVP_REQ_CFG_DEVICE;
358         memcpy(&request.config, config, sizeof(request.config));
359
360         ret = avp_dev_process_request(avp, &request);
361
362         return ret == 0 ? request.result : ret;
363 }
364
365 static int
366 avp_dev_ctrl_shutdown(struct rte_eth_dev *eth_dev)
367 {
368         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
369         struct rte_avp_request request;
370         int ret;
371
372         /* setup a shutdown request */
373         memset(&request, 0, sizeof(request));
374         request.req_id = RTE_AVP_REQ_SHUTDOWN_DEVICE;
375
376         ret = avp_dev_process_request(avp, &request);
377
378         return ret == 0 ? request.result : ret;
379 }
380
381 /* translate from host mbuf virtual address to guest virtual address */
382 static inline void *
383 avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address)
384 {
385         return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address,
386                                        (uintptr_t)avp->host_mbuf_addr),
387                            (uintptr_t)avp->mbuf_addr);
388 }
389
390 /* translate from host physical address to guest virtual address */
391 static void *
392 avp_dev_translate_address(struct rte_eth_dev *eth_dev,
393                           phys_addr_t host_phys_addr)
394 {
395         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
396         struct rte_mem_resource *resource;
397         struct rte_avp_memmap_info *info;
398         struct rte_avp_memmap *map;
399         off_t offset;
400         void *addr;
401         unsigned int i;
402
403         addr = pci_dev->mem_resource[RTE_AVP_PCI_MEMORY_BAR].addr;
404         resource = &pci_dev->mem_resource[RTE_AVP_PCI_MEMMAP_BAR];
405         info = (struct rte_avp_memmap_info *)resource->addr;
406
407         offset = 0;
408         for (i = 0; i < info->nb_maps; i++) {
409                 /* search all segments looking for a matching address */
410                 map = &info->maps[i];
411
412                 if ((host_phys_addr >= map->phys_addr) &&
413                         (host_phys_addr < (map->phys_addr + map->length))) {
414                         /* address is within this segment */
415                         offset += (host_phys_addr - map->phys_addr);
416                         addr = RTE_PTR_ADD(addr, offset);
417
418                         PMD_DRV_LOG(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p\n",
419                                     host_phys_addr, addr);
420
421                         return addr;
422                 }
423                 offset += map->length;
424         }
425
426         return NULL;
427 }
428
429 /* verify that the incoming device version is compatible with our version */
430 static int
431 avp_dev_version_check(uint32_t version)
432 {
433         uint32_t driver = RTE_AVP_STRIP_MINOR_VERSION(AVP_DPDK_DRIVER_VERSION);
434         uint32_t device = RTE_AVP_STRIP_MINOR_VERSION(version);
435
436         if (device <= driver) {
437                 /* the host driver version is less than or equal to ours */
438                 return 0;
439         }
440
441         return 1;
442 }
443
444 /* verify that memory regions have expected version and validation markers */
445 static int
446 avp_dev_check_regions(struct rte_eth_dev *eth_dev)
447 {
448         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
449         struct rte_avp_memmap_info *memmap;
450         struct rte_avp_device_info *info;
451         struct rte_mem_resource *resource;
452         unsigned int i;
453
454         /* Dump resource info for debug */
455         for (i = 0; i < PCI_MAX_RESOURCE; i++) {
456                 resource = &pci_dev->mem_resource[i];
457                 if ((resource->phys_addr == 0) || (resource->len == 0))
458                         continue;
459
460                 PMD_DRV_LOG(DEBUG, "resource[%u]: phys=0x%" PRIx64 " len=%" PRIu64 " addr=%p\n",
461                             i, resource->phys_addr,
462                             resource->len, resource->addr);
463
464                 switch (i) {
465                 case RTE_AVP_PCI_MEMMAP_BAR:
466                         memmap = (struct rte_avp_memmap_info *)resource->addr;
467                         if ((memmap->magic != RTE_AVP_MEMMAP_MAGIC) ||
468                             (memmap->version != RTE_AVP_MEMMAP_VERSION)) {
469                                 PMD_DRV_LOG(ERR, "Invalid memmap magic 0x%08x and version %u\n",
470                                             memmap->magic, memmap->version);
471                                 return -EINVAL;
472                         }
473                         break;
474
475                 case RTE_AVP_PCI_DEVICE_BAR:
476                         info = (struct rte_avp_device_info *)resource->addr;
477                         if ((info->magic != RTE_AVP_DEVICE_MAGIC) ||
478                             avp_dev_version_check(info->version)) {
479                                 PMD_DRV_LOG(ERR, "Invalid device info magic 0x%08x or version 0x%08x > 0x%08x\n",
480                                             info->magic, info->version,
481                                             AVP_DPDK_DRIVER_VERSION);
482                                 return -EINVAL;
483                         }
484                         break;
485
486                 case RTE_AVP_PCI_MEMORY_BAR:
487                 case RTE_AVP_PCI_MMIO_BAR:
488                         if (resource->addr == NULL) {
489                                 PMD_DRV_LOG(ERR, "Missing address space for BAR%u\n",
490                                             i);
491                                 return -EINVAL;
492                         }
493                         break;
494
495                 case RTE_AVP_PCI_MSIX_BAR:
496                 default:
497                         /* no validation required */
498                         break;
499                 }
500         }
501
502         return 0;
503 }
504
505 static int
506 avp_dev_detach(struct rte_eth_dev *eth_dev)
507 {
508         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
509         int ret;
510
511         PMD_DRV_LOG(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "\n",
512                     eth_dev->data->port_id, avp->device_id);
513
514         rte_spinlock_lock(&avp->lock);
515
516         if (avp->flags & AVP_F_DETACHED) {
517                 PMD_DRV_LOG(NOTICE, "port %u already detached\n",
518                             eth_dev->data->port_id);
519                 ret = 0;
520                 goto unlock;
521         }
522
523         /* shutdown the device first so the host stops sending us packets. */
524         ret = avp_dev_ctrl_shutdown(eth_dev);
525         if (ret < 0) {
526                 PMD_DRV_LOG(ERR, "Failed to send/recv shutdown to host, ret=%d\n",
527                             ret);
528                 avp->flags &= ~AVP_F_DETACHED;
529                 goto unlock;
530         }
531
532         avp->flags |= AVP_F_DETACHED;
533         rte_wmb();
534
535         /* wait for queues to acknowledge the presence of the detach flag */
536         rte_delay_ms(1);
537
538         ret = 0;
539
540 unlock:
541         rte_spinlock_unlock(&avp->lock);
542         return ret;
543 }
544
545 static void
546 _avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
547 {
548         struct avp_dev *avp =
549                 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
550         struct avp_queue *rxq;
551         uint16_t queue_count;
552         uint16_t remainder;
553
554         rxq = (struct avp_queue *)eth_dev->data->rx_queues[rx_queue_id];
555
556         /*
557          * Must map all AVP fifos as evenly as possible between the configured
558          * device queues.  Each device queue will service a subset of the AVP
559          * fifos. If there is an odd number of device queues the first set of
560          * device queues will get the extra AVP fifos.
561          */
562         queue_count = avp->num_rx_queues / eth_dev->data->nb_rx_queues;
563         remainder = avp->num_rx_queues % eth_dev->data->nb_rx_queues;
564         if (rx_queue_id < remainder) {
565                 /* these queues must service one extra FIFO */
566                 rxq->queue_base = rx_queue_id * (queue_count + 1);
567                 rxq->queue_limit = rxq->queue_base + (queue_count + 1) - 1;
568         } else {
569                 /* these queues service the regular number of FIFO */
570                 rxq->queue_base = ((remainder * (queue_count + 1)) +
571                                    ((rx_queue_id - remainder) * queue_count));
572                 rxq->queue_limit = rxq->queue_base + queue_count - 1;
573         }
574
575         PMD_DRV_LOG(DEBUG, "rxq %u at %p base %u limit %u\n",
576                     rx_queue_id, rxq, rxq->queue_base, rxq->queue_limit);
577
578         rxq->queue_id = rxq->queue_base;
579 }
580
581 static void
582 _avp_set_queue_counts(struct rte_eth_dev *eth_dev)
583 {
584         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
585         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
586         struct rte_avp_device_info *host_info;
587         void *addr;
588
589         addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
590         host_info = (struct rte_avp_device_info *)addr;
591
592         /*
593          * the transmit direction is not negotiated beyond respecting the max
594          * number of queues because the host can handle arbitrary guest tx
595          * queues (host rx queues).
596          */
597         avp->num_tx_queues = eth_dev->data->nb_tx_queues;
598
599         /*
600          * the receive direction is more restrictive.  The host requires a
601          * minimum number of guest rx queues (host tx queues) therefore
602          * negotiate a value that is at least as large as the host minimum
603          * requirement.  If the host and guest values are not identical then a
604          * mapping will be established in the receive_queue_setup function.
605          */
606         avp->num_rx_queues = RTE_MAX(host_info->min_rx_queues,
607                                      eth_dev->data->nb_rx_queues);
608
609         PMD_DRV_LOG(DEBUG, "Requesting %u Tx and %u Rx queues from host\n",
610                     avp->num_tx_queues, avp->num_rx_queues);
611 }
612
613 static int
614 avp_dev_attach(struct rte_eth_dev *eth_dev)
615 {
616         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
617         struct rte_avp_device_config config;
618         unsigned int i;
619         int ret;
620
621         PMD_DRV_LOG(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "\n",
622                     eth_dev->data->port_id, avp->device_id);
623
624         rte_spinlock_lock(&avp->lock);
625
626         if (!(avp->flags & AVP_F_DETACHED)) {
627                 PMD_DRV_LOG(NOTICE, "port %u already attached\n",
628                             eth_dev->data->port_id);
629                 ret = 0;
630                 goto unlock;
631         }
632
633         /*
634          * make sure that the detached flag is set prior to reconfiguring the
635          * queues.
636          */
637         avp->flags |= AVP_F_DETACHED;
638         rte_wmb();
639
640         /*
641          * re-run the device create utility which will parse the new host info
642          * and setup the AVP device queue pointers.
643          */
644         ret = avp_dev_create(AVP_DEV_TO_PCI(eth_dev), eth_dev);
645         if (ret < 0) {
646                 PMD_DRV_LOG(ERR, "Failed to re-create AVP device, ret=%d\n",
647                             ret);
648                 goto unlock;
649         }
650
651         if (avp->flags & AVP_F_CONFIGURED) {
652                 /*
653                  * Update the receive queue mapping to handle cases where the
654                  * source and destination hosts have different queue
655                  * requirements.  As long as the DETACHED flag is asserted the
656                  * queue table should not be referenced so it should be safe to
657                  * update it.
658                  */
659                 _avp_set_queue_counts(eth_dev);
660                 for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
661                         _avp_set_rx_queue_mappings(eth_dev, i);
662
663                 /*
664                  * Update the host with our config details so that it knows the
665                  * device is active.
666                  */
667                 memset(&config, 0, sizeof(config));
668                 config.device_id = avp->device_id;
669                 config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
670                 config.driver_version = AVP_DPDK_DRIVER_VERSION;
671                 config.features = avp->features;
672                 config.num_tx_queues = avp->num_tx_queues;
673                 config.num_rx_queues = avp->num_rx_queues;
674                 config.if_up = !!(avp->flags & AVP_F_LINKUP);
675
676                 ret = avp_dev_ctrl_set_config(eth_dev, &config);
677                 if (ret < 0) {
678                         PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
679                                     ret);
680                         goto unlock;
681                 }
682         }
683
684         rte_wmb();
685         avp->flags &= ~AVP_F_DETACHED;
686
687         ret = 0;
688
689 unlock:
690         rte_spinlock_unlock(&avp->lock);
691         return ret;
692 }
693
694 static void
695 avp_dev_interrupt_handler(void *data)
696 {
697         struct rte_eth_dev *eth_dev = data;
698         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
699         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
700         uint32_t status, value;
701         int ret;
702
703         if (registers == NULL)
704                 rte_panic("no mapped MMIO register space\n");
705
706         /* read the interrupt status register
707          * note: this register clears on read so all raised interrupts must be
708          *    handled or remembered for later processing
709          */
710         status = AVP_READ32(
711                 RTE_PTR_ADD(registers,
712                             RTE_AVP_INTERRUPT_STATUS_OFFSET));
713
714         if (status & RTE_AVP_MIGRATION_INTERRUPT_MASK) {
715                 /* handle interrupt based on current status */
716                 value = AVP_READ32(
717                         RTE_PTR_ADD(registers,
718                                     RTE_AVP_MIGRATION_STATUS_OFFSET));
719                 switch (value) {
720                 case RTE_AVP_MIGRATION_DETACHED:
721                         ret = avp_dev_detach(eth_dev);
722                         break;
723                 case RTE_AVP_MIGRATION_ATTACHED:
724                         ret = avp_dev_attach(eth_dev);
725                         break;
726                 default:
727                         PMD_DRV_LOG(ERR, "unexpected migration status, status=%u\n",
728                                     value);
729                         ret = -EINVAL;
730                 }
731
732                 /* acknowledge the request by writing out our current status */
733                 value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR);
734                 AVP_WRITE32(value,
735                             RTE_PTR_ADD(registers,
736                                         RTE_AVP_MIGRATION_ACK_OFFSET));
737
738                 PMD_DRV_LOG(NOTICE, "AVP migration interrupt handled\n");
739         }
740
741         if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK)
742                 PMD_DRV_LOG(WARNING, "AVP unexpected interrupt, status=0x%08x\n",
743                             status);
744
745         /* re-enable UIO interrupt handling */
746         ret = rte_intr_enable(&pci_dev->intr_handle);
747         if (ret < 0) {
748                 PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n",
749                             ret);
750                 /* continue */
751         }
752 }
753
754 static int
755 avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev)
756 {
757         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
758         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
759         int ret;
760
761         if (registers == NULL)
762                 return -EINVAL;
763
764         /* enable UIO interrupt handling */
765         ret = rte_intr_enable(&pci_dev->intr_handle);
766         if (ret < 0) {
767                 PMD_DRV_LOG(ERR, "Failed to enable UIO interrupts, ret=%d\n",
768                             ret);
769                 return ret;
770         }
771
772         /* inform the device that all interrupts are enabled */
773         AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK,
774                     RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
775
776         return 0;
777 }
778
779 static int
780 avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev)
781 {
782         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
783         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
784         int ret;
785
786         if (registers == NULL)
787                 return 0;
788
789         /* inform the device that all interrupts are disabled */
790         AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK,
791                     RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
792
793         /* enable UIO interrupt handling */
794         ret = rte_intr_disable(&pci_dev->intr_handle);
795         if (ret < 0) {
796                 PMD_DRV_LOG(ERR, "Failed to disable UIO interrupts, ret=%d\n",
797                             ret);
798                 return ret;
799         }
800
801         return 0;
802 }
803
804 static int
805 avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev)
806 {
807         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
808         int ret;
809
810         /* register a callback handler with UIO for interrupt notifications */
811         ret = rte_intr_callback_register(&pci_dev->intr_handle,
812                                          avp_dev_interrupt_handler,
813                                          (void *)eth_dev);
814         if (ret < 0) {
815                 PMD_DRV_LOG(ERR, "Failed to register UIO interrupt callback, ret=%d\n",
816                             ret);
817                 return ret;
818         }
819
820         /* enable interrupt processing */
821         return avp_dev_enable_interrupts(eth_dev);
822 }
823
824 static int
825 avp_dev_migration_pending(struct rte_eth_dev *eth_dev)
826 {
827         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
828         void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
829         uint32_t value;
830
831         if (registers == NULL)
832                 return 0;
833
834         value = AVP_READ32(RTE_PTR_ADD(registers,
835                                        RTE_AVP_MIGRATION_STATUS_OFFSET));
836         if (value == RTE_AVP_MIGRATION_DETACHED) {
837                 /* migration is in progress; ack it if we have not already */
838                 AVP_WRITE32(value,
839                             RTE_PTR_ADD(registers,
840                                         RTE_AVP_MIGRATION_ACK_OFFSET));
841                 return 1;
842         }
843         return 0;
844 }
845
846 /*
847  * create a AVP device using the supplied device info by first translating it
848  * to guest address space(s).
849  */
850 static int
851 avp_dev_create(struct rte_pci_device *pci_dev,
852                struct rte_eth_dev *eth_dev)
853 {
854         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
855         struct rte_avp_device_info *host_info;
856         struct rte_mem_resource *resource;
857         unsigned int i;
858
859         resource = &pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR];
860         if (resource->addr == NULL) {
861                 PMD_DRV_LOG(ERR, "BAR%u is not mapped\n",
862                             RTE_AVP_PCI_DEVICE_BAR);
863                 return -EFAULT;
864         }
865         host_info = (struct rte_avp_device_info *)resource->addr;
866
867         if ((host_info->magic != RTE_AVP_DEVICE_MAGIC) ||
868                 avp_dev_version_check(host_info->version)) {
869                 PMD_DRV_LOG(ERR, "Invalid AVP PCI device, magic 0x%08x version 0x%08x > 0x%08x\n",
870                             host_info->magic, host_info->version,
871                             AVP_DPDK_DRIVER_VERSION);
872                 return -EINVAL;
873         }
874
875         PMD_DRV_LOG(DEBUG, "AVP host device is v%u.%u.%u\n",
876                     RTE_AVP_GET_RELEASE_VERSION(host_info->version),
877                     RTE_AVP_GET_MAJOR_VERSION(host_info->version),
878                     RTE_AVP_GET_MINOR_VERSION(host_info->version));
879
880         PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u TX queue(s)\n",
881                     host_info->min_tx_queues, host_info->max_tx_queues);
882         PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u RX queue(s)\n",
883                     host_info->min_rx_queues, host_info->max_rx_queues);
884         PMD_DRV_LOG(DEBUG, "AVP host supports features 0x%08x\n",
885                     host_info->features);
886
887         if (avp->magic != AVP_ETHDEV_MAGIC) {
888                 /*
889                  * First time initialization (i.e., not during a VM
890                  * migration)
891                  */
892                 memset(avp, 0, sizeof(*avp));
893                 avp->magic = AVP_ETHDEV_MAGIC;
894                 avp->dev_data = eth_dev->data;
895                 avp->port_id = eth_dev->data->port_id;
896                 avp->host_mbuf_size = host_info->mbuf_size;
897                 avp->host_features = host_info->features;
898                 rte_spinlock_init(&avp->lock);
899                 memcpy(&avp->ethaddr.addr_bytes[0],
900                        host_info->ethaddr, ETHER_ADDR_LEN);
901                 /* adjust max values to not exceed our max */
902                 avp->max_tx_queues =
903                         RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES);
904                 avp->max_rx_queues =
905                         RTE_MIN(host_info->max_rx_queues, RTE_AVP_MAX_QUEUES);
906         } else {
907                 /* Re-attaching during migration */
908
909                 /* TODO... requires validation of host values */
910                 if ((host_info->features & avp->features) != avp->features) {
911                         PMD_DRV_LOG(ERR, "AVP host features mismatched; 0x%08x, host=0x%08x\n",
912                                     avp->features, host_info->features);
913                         /* this should not be possible; continue for now */
914                 }
915         }
916
917         /* the device id is allowed to change over migrations */
918         avp->device_id = host_info->device_id;
919
920         /* translate incoming host addresses to guest address space */
921         PMD_DRV_LOG(DEBUG, "AVP first host tx queue at 0x%" PRIx64 "\n",
922                     host_info->tx_phys);
923         PMD_DRV_LOG(DEBUG, "AVP first host alloc queue at 0x%" PRIx64 "\n",
924                     host_info->alloc_phys);
925         for (i = 0; i < avp->max_tx_queues; i++) {
926                 avp->tx_q[i] = avp_dev_translate_address(eth_dev,
927                         host_info->tx_phys + (i * host_info->tx_size));
928
929                 avp->alloc_q[i] = avp_dev_translate_address(eth_dev,
930                         host_info->alloc_phys + (i * host_info->alloc_size));
931         }
932
933         PMD_DRV_LOG(DEBUG, "AVP first host rx queue at 0x%" PRIx64 "\n",
934                     host_info->rx_phys);
935         PMD_DRV_LOG(DEBUG, "AVP first host free queue at 0x%" PRIx64 "\n",
936                     host_info->free_phys);
937         for (i = 0; i < avp->max_rx_queues; i++) {
938                 avp->rx_q[i] = avp_dev_translate_address(eth_dev,
939                         host_info->rx_phys + (i * host_info->rx_size));
940                 avp->free_q[i] = avp_dev_translate_address(eth_dev,
941                         host_info->free_phys + (i * host_info->free_size));
942         }
943
944         PMD_DRV_LOG(DEBUG, "AVP host request queue at 0x%" PRIx64 "\n",
945                     host_info->req_phys);
946         PMD_DRV_LOG(DEBUG, "AVP host response queue at 0x%" PRIx64 "\n",
947                     host_info->resp_phys);
948         PMD_DRV_LOG(DEBUG, "AVP host sync address at 0x%" PRIx64 "\n",
949                     host_info->sync_phys);
950         PMD_DRV_LOG(DEBUG, "AVP host mbuf address at 0x%" PRIx64 "\n",
951                     host_info->mbuf_phys);
952         avp->req_q = avp_dev_translate_address(eth_dev, host_info->req_phys);
953         avp->resp_q = avp_dev_translate_address(eth_dev, host_info->resp_phys);
954         avp->sync_addr =
955                 avp_dev_translate_address(eth_dev, host_info->sync_phys);
956         avp->mbuf_addr =
957                 avp_dev_translate_address(eth_dev, host_info->mbuf_phys);
958
959         /*
960          * store the host mbuf virtual address so that we can calculate
961          * relative offsets for each mbuf as they are processed
962          */
963         avp->host_mbuf_addr = host_info->mbuf_va;
964         avp->host_sync_addr = host_info->sync_va;
965
966         /*
967          * store the maximum packet length that is supported by the host.
968          */
969         avp->max_rx_pkt_len = host_info->max_rx_pkt_len;
970         PMD_DRV_LOG(DEBUG, "AVP host max receive packet length is %u\n",
971                                 host_info->max_rx_pkt_len);
972
973         return 0;
974 }
975
976 /*
977  * This function is based on probe() function in avp_pci.c
978  * It returns 0 on success.
979  */
980 static int
981 eth_avp_dev_init(struct rte_eth_dev *eth_dev)
982 {
983         struct avp_dev *avp =
984                 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
985         struct rte_pci_device *pci_dev;
986         int ret;
987
988         pci_dev = AVP_DEV_TO_PCI(eth_dev);
989         eth_dev->dev_ops = &avp_eth_dev_ops;
990         eth_dev->rx_pkt_burst = &avp_recv_pkts;
991         eth_dev->tx_pkt_burst = &avp_xmit_pkts;
992
993         if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
994                 /*
995                  * no setup required on secondary processes.  All data is saved
996                  * in dev_private by the primary process. All resource should
997                  * be mapped to the same virtual address so all pointers should
998                  * be valid.
999                  */
1000                 if (eth_dev->data->scattered_rx) {
1001                         PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
1002                         eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1003                         eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1004                 }
1005                 return 0;
1006         }
1007
1008         rte_eth_copy_pci_info(eth_dev, pci_dev);
1009
1010         eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
1011
1012         /* Check current migration status */
1013         if (avp_dev_migration_pending(eth_dev)) {
1014                 PMD_DRV_LOG(ERR, "VM live migration operation in progress\n");
1015                 return -EBUSY;
1016         }
1017
1018         /* Check BAR resources */
1019         ret = avp_dev_check_regions(eth_dev);
1020         if (ret < 0) {
1021                 PMD_DRV_LOG(ERR, "Failed to validate BAR resources, ret=%d\n",
1022                             ret);
1023                 return ret;
1024         }
1025
1026         /* Enable interrupts */
1027         ret = avp_dev_setup_interrupts(eth_dev);
1028         if (ret < 0) {
1029                 PMD_DRV_LOG(ERR, "Failed to enable interrupts, ret=%d\n", ret);
1030                 return ret;
1031         }
1032
1033         /* Handle each subtype */
1034         ret = avp_dev_create(pci_dev, eth_dev);
1035         if (ret < 0) {
1036                 PMD_DRV_LOG(ERR, "Failed to create device, ret=%d\n", ret);
1037                 return ret;
1038         }
1039
1040         /* Allocate memory for storing MAC addresses */
1041         eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev", ETHER_ADDR_LEN, 0);
1042         if (eth_dev->data->mac_addrs == NULL) {
1043                 PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to store MAC addresses\n",
1044                             ETHER_ADDR_LEN);
1045                 return -ENOMEM;
1046         }
1047
1048         /* Get a mac from device config */
1049         ether_addr_copy(&avp->ethaddr, &eth_dev->data->mac_addrs[0]);
1050
1051         return 0;
1052 }
1053
1054 static int
1055 eth_avp_dev_uninit(struct rte_eth_dev *eth_dev)
1056 {
1057         int ret;
1058
1059         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1060                 return -EPERM;
1061
1062         if (eth_dev->data == NULL)
1063                 return 0;
1064
1065         ret = avp_dev_disable_interrupts(eth_dev);
1066         if (ret != 0) {
1067                 PMD_DRV_LOG(ERR, "Failed to disable interrupts, ret=%d\n", ret);
1068                 return ret;
1069         }
1070
1071         if (eth_dev->data->mac_addrs != NULL) {
1072                 rte_free(eth_dev->data->mac_addrs);
1073                 eth_dev->data->mac_addrs = NULL;
1074         }
1075
1076         return 0;
1077 }
1078
1079 static int
1080 eth_avp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
1081                   struct rte_pci_device *pci_dev)
1082 {
1083         struct rte_eth_dev *eth_dev;
1084         int ret;
1085
1086         eth_dev = rte_eth_dev_pci_allocate(pci_dev,
1087                                            sizeof(struct avp_adapter));
1088         if (eth_dev == NULL)
1089                 return -ENOMEM;
1090
1091         ret = eth_avp_dev_init(eth_dev);
1092         if (ret)
1093                 rte_eth_dev_pci_release(eth_dev);
1094
1095         return ret;
1096 }
1097
1098 static int
1099 eth_avp_pci_remove(struct rte_pci_device *pci_dev)
1100 {
1101         return rte_eth_dev_pci_generic_remove(pci_dev,
1102                                               eth_avp_dev_uninit);
1103 }
1104
1105 static struct rte_pci_driver rte_avp_pmd = {
1106         .id_table = pci_id_avp_map,
1107         .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
1108         .probe = eth_avp_pci_probe,
1109         .remove = eth_avp_pci_remove,
1110 };
1111
1112 static int
1113 avp_dev_enable_scattered(struct rte_eth_dev *eth_dev,
1114                          struct avp_dev *avp)
1115 {
1116         unsigned int max_rx_pkt_len;
1117
1118         max_rx_pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
1119
1120         if ((max_rx_pkt_len > avp->guest_mbuf_size) ||
1121             (max_rx_pkt_len > avp->host_mbuf_size)) {
1122                 /*
1123                  * If the guest MTU is greater than either the host or guest
1124                  * buffers then chained mbufs have to be enabled in the TX
1125                  * direction.  It is assumed that the application will not need
1126                  * to send packets larger than their max_rx_pkt_len (MRU).
1127                  */
1128                 return 1;
1129         }
1130
1131         if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) ||
1132             (avp->max_rx_pkt_len > avp->host_mbuf_size)) {
1133                 /*
1134                  * If the host MRU is greater than its own mbuf size or the
1135                  * guest mbuf size then chained mbufs have to be enabled in the
1136                  * RX direction.
1137                  */
1138                 return 1;
1139         }
1140
1141         return 0;
1142 }
1143
1144 static int
1145 avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
1146                        uint16_t rx_queue_id,
1147                        uint16_t nb_rx_desc,
1148                        unsigned int socket_id,
1149                        const struct rte_eth_rxconf *rx_conf,
1150                        struct rte_mempool *pool)
1151 {
1152         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1153         struct rte_pktmbuf_pool_private *mbp_priv;
1154         struct avp_queue *rxq;
1155
1156         if (rx_queue_id >= eth_dev->data->nb_rx_queues) {
1157                 PMD_DRV_LOG(ERR, "RX queue id is out of range: rx_queue_id=%u, nb_rx_queues=%u\n",
1158                             rx_queue_id, eth_dev->data->nb_rx_queues);
1159                 return -EINVAL;
1160         }
1161
1162         /* Save mbuf pool pointer */
1163         avp->pool = pool;
1164
1165         /* Save the local mbuf size */
1166         mbp_priv = rte_mempool_get_priv(pool);
1167         avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size);
1168         avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM;
1169
1170         if (avp_dev_enable_scattered(eth_dev, avp)) {
1171                 if (!eth_dev->data->scattered_rx) {
1172                         PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
1173                         eth_dev->data->scattered_rx = 1;
1174                         eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1175                         eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1176                 }
1177         }
1178
1179         PMD_DRV_LOG(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)\n",
1180                     avp->max_rx_pkt_len,
1181                     eth_dev->data->dev_conf.rxmode.max_rx_pkt_len,
1182                     avp->host_mbuf_size,
1183                     avp->guest_mbuf_size);
1184
1185         /* allocate a queue object */
1186         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct avp_queue),
1187                                  RTE_CACHE_LINE_SIZE, socket_id);
1188         if (rxq == NULL) {
1189                 PMD_DRV_LOG(ERR, "Failed to allocate new Rx queue object\n");
1190                 return -ENOMEM;
1191         }
1192
1193         /* save back pointers to AVP and Ethernet devices */
1194         rxq->avp = avp;
1195         rxq->dev_data = eth_dev->data;
1196         eth_dev->data->rx_queues[rx_queue_id] = (void *)rxq;
1197
1198         /* setup the queue receive mapping for the current queue. */
1199         _avp_set_rx_queue_mappings(eth_dev, rx_queue_id);
1200
1201         PMD_DRV_LOG(DEBUG, "Rx queue %u setup at %p\n", rx_queue_id, rxq);
1202
1203         (void)nb_rx_desc;
1204         (void)rx_conf;
1205         return 0;
1206 }
1207
1208 static int
1209 avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev,
1210                        uint16_t tx_queue_id,
1211                        uint16_t nb_tx_desc,
1212                        unsigned int socket_id,
1213                        const struct rte_eth_txconf *tx_conf)
1214 {
1215         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1216         struct avp_queue *txq;
1217
1218         if (tx_queue_id >= eth_dev->data->nb_tx_queues) {
1219                 PMD_DRV_LOG(ERR, "TX queue id is out of range: tx_queue_id=%u, nb_tx_queues=%u\n",
1220                             tx_queue_id, eth_dev->data->nb_tx_queues);
1221                 return -EINVAL;
1222         }
1223
1224         /* allocate a queue object */
1225         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct avp_queue),
1226                                  RTE_CACHE_LINE_SIZE, socket_id);
1227         if (txq == NULL) {
1228                 PMD_DRV_LOG(ERR, "Failed to allocate new Tx queue object\n");
1229                 return -ENOMEM;
1230         }
1231
1232         /* only the configured set of transmit queues are used */
1233         txq->queue_id = tx_queue_id;
1234         txq->queue_base = tx_queue_id;
1235         txq->queue_limit = tx_queue_id;
1236
1237         /* save back pointers to AVP and Ethernet devices */
1238         txq->avp = avp;
1239         txq->dev_data = eth_dev->data;
1240         eth_dev->data->tx_queues[tx_queue_id] = (void *)txq;
1241
1242         PMD_DRV_LOG(DEBUG, "Tx queue %u setup at %p\n", tx_queue_id, txq);
1243
1244         (void)nb_tx_desc;
1245         (void)tx_conf;
1246         return 0;
1247 }
1248
1249 static inline int
1250 _avp_cmp_ether_addr(struct ether_addr *a, struct ether_addr *b)
1251 {
1252         uint16_t *_a = (uint16_t *)&a->addr_bytes[0];
1253         uint16_t *_b = (uint16_t *)&b->addr_bytes[0];
1254         return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]);
1255 }
1256
1257 static inline int
1258 _avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m)
1259 {
1260         struct ether_hdr *eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
1261
1262         if (likely(_avp_cmp_ether_addr(&avp->ethaddr, &eth->d_addr) == 0)) {
1263                 /* allow all packets destined to our address */
1264                 return 0;
1265         }
1266
1267         if (likely(is_broadcast_ether_addr(&eth->d_addr))) {
1268                 /* allow all broadcast packets */
1269                 return 0;
1270         }
1271
1272         if (likely(is_multicast_ether_addr(&eth->d_addr))) {
1273                 /* allow all multicast packets */
1274                 return 0;
1275         }
1276
1277         if (avp->flags & AVP_F_PROMISC) {
1278                 /* allow all packets when in promiscuous mode */
1279                 return 0;
1280         }
1281
1282         return -1;
1283 }
1284
1285 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1286 static inline void
1287 __avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf)
1288 {
1289         struct rte_avp_desc *first_buf;
1290         struct rte_avp_desc *pkt_buf;
1291         unsigned int pkt_len;
1292         unsigned int nb_segs;
1293         void *pkt_data;
1294         unsigned int i;
1295
1296         first_buf = avp_dev_translate_buffer(avp, buf);
1297
1298         i = 0;
1299         pkt_len = 0;
1300         nb_segs = first_buf->nb_segs;
1301         do {
1302                 /* Adjust pointers for guest addressing */
1303                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1304                 if (pkt_buf == NULL)
1305                         rte_panic("bad buffer: segment %u has an invalid address %p\n",
1306                                   i, buf);
1307                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1308                 if (pkt_data == NULL)
1309                         rte_panic("bad buffer: segment %u has a NULL data pointer\n",
1310                                   i);
1311                 if (pkt_buf->data_len == 0)
1312                         rte_panic("bad buffer: segment %u has 0 data length\n",
1313                                   i);
1314                 pkt_len += pkt_buf->data_len;
1315                 nb_segs--;
1316                 i++;
1317
1318         } while (nb_segs && (buf = pkt_buf->next) != NULL);
1319
1320         if (nb_segs != 0)
1321                 rte_panic("bad buffer: expected %u segments found %u\n",
1322                           first_buf->nb_segs, (first_buf->nb_segs - nb_segs));
1323         if (pkt_len != first_buf->pkt_len)
1324                 rte_panic("bad buffer: expected length %u found %u\n",
1325                           first_buf->pkt_len, pkt_len);
1326 }
1327
1328 #define avp_dev_buffer_sanity_check(a, b) \
1329         __avp_dev_buffer_sanity_check((a), (b))
1330
1331 #else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */
1332
1333 #define avp_dev_buffer_sanity_check(a, b) do {} while (0)
1334
1335 #endif
1336
1337 /*
1338  * Copy a host buffer chain to a set of mbufs.  This function assumes that
1339  * there exactly the required number of mbufs to copy all source bytes.
1340  */
1341 static inline struct rte_mbuf *
1342 avp_dev_copy_from_buffers(struct avp_dev *avp,
1343                           struct rte_avp_desc *buf,
1344                           struct rte_mbuf **mbufs,
1345                           unsigned int count)
1346 {
1347         struct rte_mbuf *m_previous = NULL;
1348         struct rte_avp_desc *pkt_buf;
1349         unsigned int total_length = 0;
1350         unsigned int copy_length;
1351         unsigned int src_offset;
1352         struct rte_mbuf *m;
1353         uint16_t ol_flags;
1354         uint16_t vlan_tci;
1355         void *pkt_data;
1356         unsigned int i;
1357
1358         avp_dev_buffer_sanity_check(avp, buf);
1359
1360         /* setup the first source buffer */
1361         pkt_buf = avp_dev_translate_buffer(avp, buf);
1362         pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1363         total_length = pkt_buf->pkt_len;
1364         src_offset = 0;
1365
1366         if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1367                 ol_flags = PKT_RX_VLAN_PKT;
1368                 vlan_tci = pkt_buf->vlan_tci;
1369         } else {
1370                 ol_flags = 0;
1371                 vlan_tci = 0;
1372         }
1373
1374         for (i = 0; (i < count) && (buf != NULL); i++) {
1375                 /* fill each destination buffer */
1376                 m = mbufs[i];
1377
1378                 if (m_previous != NULL)
1379                         m_previous->next = m;
1380
1381                 m_previous = m;
1382
1383                 do {
1384                         /*
1385                          * Copy as many source buffers as will fit in the
1386                          * destination buffer.
1387                          */
1388                         copy_length = RTE_MIN((avp->guest_mbuf_size -
1389                                                rte_pktmbuf_data_len(m)),
1390                                               (pkt_buf->data_len -
1391                                                src_offset));
1392                         rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1393                                                rte_pktmbuf_data_len(m)),
1394                                    RTE_PTR_ADD(pkt_data, src_offset),
1395                                    copy_length);
1396                         rte_pktmbuf_data_len(m) += copy_length;
1397                         src_offset += copy_length;
1398
1399                         if (likely(src_offset == pkt_buf->data_len)) {
1400                                 /* need a new source buffer */
1401                                 buf = pkt_buf->next;
1402                                 if (buf != NULL) {
1403                                         pkt_buf = avp_dev_translate_buffer(
1404                                                 avp, buf);
1405                                         pkt_data = avp_dev_translate_buffer(
1406                                                 avp, pkt_buf->data);
1407                                         src_offset = 0;
1408                                 }
1409                         }
1410
1411                         if (unlikely(rte_pktmbuf_data_len(m) ==
1412                                      avp->guest_mbuf_size)) {
1413                                 /* need a new destination mbuf */
1414                                 break;
1415                         }
1416
1417                 } while (buf != NULL);
1418         }
1419
1420         m = mbufs[0];
1421         m->ol_flags = ol_flags;
1422         m->nb_segs = count;
1423         rte_pktmbuf_pkt_len(m) = total_length;
1424         m->vlan_tci = vlan_tci;
1425
1426         __rte_mbuf_sanity_check(m, 1);
1427
1428         return m;
1429 }
1430
1431 static uint16_t
1432 avp_recv_scattered_pkts(void *rx_queue,
1433                         struct rte_mbuf **rx_pkts,
1434                         uint16_t nb_pkts)
1435 {
1436         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1437         struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1438         struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS];
1439         struct avp_dev *avp = rxq->avp;
1440         struct rte_avp_desc *pkt_buf;
1441         struct rte_avp_fifo *free_q;
1442         struct rte_avp_fifo *rx_q;
1443         struct rte_avp_desc *buf;
1444         unsigned int count, avail, n;
1445         unsigned int guest_mbuf_size;
1446         struct rte_mbuf *m;
1447         unsigned int required;
1448         unsigned int buf_len;
1449         unsigned int port_id;
1450         unsigned int i;
1451
1452         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1453                 /* VM live migration in progress */
1454                 return 0;
1455         }
1456
1457         guest_mbuf_size = avp->guest_mbuf_size;
1458         port_id = avp->port_id;
1459         rx_q = avp->rx_q[rxq->queue_id];
1460         free_q = avp->free_q[rxq->queue_id];
1461
1462         /* setup next queue to service */
1463         rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1464                 (rxq->queue_id + 1) : rxq->queue_base;
1465
1466         /* determine how many slots are available in the free queue */
1467         count = avp_fifo_free_count(free_q);
1468
1469         /* determine how many packets are available in the rx queue */
1470         avail = avp_fifo_count(rx_q);
1471
1472         /* determine how many packets can be received */
1473         count = RTE_MIN(count, avail);
1474         count = RTE_MIN(count, nb_pkts);
1475         count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1476
1477         if (unlikely(count == 0)) {
1478                 /* no free buffers, or no buffers on the rx queue */
1479                 return 0;
1480         }
1481
1482         /* retrieve pending packets */
1483         n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1484         PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1485                    count, rx_q);
1486
1487         count = 0;
1488         for (i = 0; i < n; i++) {
1489                 /* prefetch next entry while processing current one */
1490                 if (i + 1 < n) {
1491                         pkt_buf = avp_dev_translate_buffer(avp,
1492                                                            avp_bufs[i + 1]);
1493                         rte_prefetch0(pkt_buf);
1494                 }
1495                 buf = avp_bufs[i];
1496
1497                 /* Peek into the first buffer to determine the total length */
1498                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1499                 buf_len = pkt_buf->pkt_len;
1500
1501                 /* Allocate enough mbufs to receive the entire packet */
1502                 required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size;
1503                 if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) {
1504                         rxq->dev_data->rx_mbuf_alloc_failed++;
1505                         continue;
1506                 }
1507
1508                 /* Copy the data from the buffers to our mbufs */
1509                 m = avp_dev_copy_from_buffers(avp, buf, mbufs, required);
1510
1511                 /* finalize mbuf */
1512                 m->port = port_id;
1513
1514                 if (_avp_mac_filter(avp, m) != 0) {
1515                         /* silently discard packets not destined to our MAC */
1516                         rte_pktmbuf_free(m);
1517                         continue;
1518                 }
1519
1520                 /* return new mbuf to caller */
1521                 rx_pkts[count++] = m;
1522                 rxq->bytes += buf_len;
1523         }
1524
1525         rxq->packets += count;
1526
1527         /* return the buffers to the free queue */
1528         avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1529
1530         return count;
1531 }
1532
1533
1534 static uint16_t
1535 avp_recv_pkts(void *rx_queue,
1536               struct rte_mbuf **rx_pkts,
1537               uint16_t nb_pkts)
1538 {
1539         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1540         struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1541         struct avp_dev *avp = rxq->avp;
1542         struct rte_avp_desc *pkt_buf;
1543         struct rte_avp_fifo *free_q;
1544         struct rte_avp_fifo *rx_q;
1545         unsigned int count, avail, n;
1546         unsigned int pkt_len;
1547         struct rte_mbuf *m;
1548         char *pkt_data;
1549         unsigned int i;
1550
1551         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1552                 /* VM live migration in progress */
1553                 return 0;
1554         }
1555
1556         rx_q = avp->rx_q[rxq->queue_id];
1557         free_q = avp->free_q[rxq->queue_id];
1558
1559         /* setup next queue to service */
1560         rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1561                 (rxq->queue_id + 1) : rxq->queue_base;
1562
1563         /* determine how many slots are available in the free queue */
1564         count = avp_fifo_free_count(free_q);
1565
1566         /* determine how many packets are available in the rx queue */
1567         avail = avp_fifo_count(rx_q);
1568
1569         /* determine how many packets can be received */
1570         count = RTE_MIN(count, avail);
1571         count = RTE_MIN(count, nb_pkts);
1572         count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1573
1574         if (unlikely(count == 0)) {
1575                 /* no free buffers, or no buffers on the rx queue */
1576                 return 0;
1577         }
1578
1579         /* retrieve pending packets */
1580         n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1581         PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1582                    count, rx_q);
1583
1584         count = 0;
1585         for (i = 0; i < n; i++) {
1586                 /* prefetch next entry while processing current one */
1587                 if (i < n - 1) {
1588                         pkt_buf = avp_dev_translate_buffer(avp,
1589                                                            avp_bufs[i + 1]);
1590                         rte_prefetch0(pkt_buf);
1591                 }
1592
1593                 /* Adjust host pointers for guest addressing */
1594                 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1595                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1596                 pkt_len = pkt_buf->pkt_len;
1597
1598                 if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1599                              (pkt_buf->nb_segs > 1))) {
1600                         /*
1601                          * application should be using the scattered receive
1602                          * function
1603                          */
1604                         rxq->errors++;
1605                         continue;
1606                 }
1607
1608                 /* process each packet to be transmitted */
1609                 m = rte_pktmbuf_alloc(avp->pool);
1610                 if (unlikely(m == NULL)) {
1611                         rxq->dev_data->rx_mbuf_alloc_failed++;
1612                         continue;
1613                 }
1614
1615                 /* copy data out of the host buffer to our buffer */
1616                 m->data_off = RTE_PKTMBUF_HEADROOM;
1617                 rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len);
1618
1619                 /* initialize the local mbuf */
1620                 rte_pktmbuf_data_len(m) = pkt_len;
1621                 rte_pktmbuf_pkt_len(m) = pkt_len;
1622                 m->port = avp->port_id;
1623
1624                 if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1625                         m->ol_flags = PKT_RX_VLAN_PKT;
1626                         m->vlan_tci = pkt_buf->vlan_tci;
1627                 }
1628
1629                 if (_avp_mac_filter(avp, m) != 0) {
1630                         /* silently discard packets not destined to our MAC */
1631                         rte_pktmbuf_free(m);
1632                         continue;
1633                 }
1634
1635                 /* return new mbuf to caller */
1636                 rx_pkts[count++] = m;
1637                 rxq->bytes += pkt_len;
1638         }
1639
1640         rxq->packets += count;
1641
1642         /* return the buffers to the free queue */
1643         avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1644
1645         return count;
1646 }
1647
1648 /*
1649  * Copy a chained mbuf to a set of host buffers.  This function assumes that
1650  * there are sufficient destination buffers to contain the entire source
1651  * packet.
1652  */
1653 static inline uint16_t
1654 avp_dev_copy_to_buffers(struct avp_dev *avp,
1655                         struct rte_mbuf *mbuf,
1656                         struct rte_avp_desc **buffers,
1657                         unsigned int count)
1658 {
1659         struct rte_avp_desc *previous_buf = NULL;
1660         struct rte_avp_desc *first_buf = NULL;
1661         struct rte_avp_desc *pkt_buf;
1662         struct rte_avp_desc *buf;
1663         size_t total_length;
1664         struct rte_mbuf *m;
1665         size_t copy_length;
1666         size_t src_offset;
1667         char *pkt_data;
1668         unsigned int i;
1669
1670         __rte_mbuf_sanity_check(mbuf, 1);
1671
1672         m = mbuf;
1673         src_offset = 0;
1674         total_length = rte_pktmbuf_pkt_len(m);
1675         for (i = 0; (i < count) && (m != NULL); i++) {
1676                 /* fill each destination buffer */
1677                 buf = buffers[i];
1678
1679                 if (i < count - 1) {
1680                         /* prefetch next entry while processing this one */
1681                         pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]);
1682                         rte_prefetch0(pkt_buf);
1683                 }
1684
1685                 /* Adjust pointers for guest addressing */
1686                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1687                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1688
1689                 /* setup the buffer chain */
1690                 if (previous_buf != NULL)
1691                         previous_buf->next = buf;
1692                 else
1693                         first_buf = pkt_buf;
1694
1695                 previous_buf = pkt_buf;
1696
1697                 do {
1698                         /*
1699                          * copy as many source mbuf segments as will fit in the
1700                          * destination buffer.
1701                          */
1702                         copy_length = RTE_MIN((avp->host_mbuf_size -
1703                                                pkt_buf->data_len),
1704                                               (rte_pktmbuf_data_len(m) -
1705                                                src_offset));
1706                         rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len),
1707                                    RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1708                                                src_offset),
1709                                    copy_length);
1710                         pkt_buf->data_len += copy_length;
1711                         src_offset += copy_length;
1712
1713                         if (likely(src_offset == rte_pktmbuf_data_len(m))) {
1714                                 /* need a new source buffer */
1715                                 m = m->next;
1716                                 src_offset = 0;
1717                         }
1718
1719                         if (unlikely(pkt_buf->data_len ==
1720                                      avp->host_mbuf_size)) {
1721                                 /* need a new destination buffer */
1722                                 break;
1723                         }
1724
1725                 } while (m != NULL);
1726         }
1727
1728         first_buf->nb_segs = count;
1729         first_buf->pkt_len = total_length;
1730
1731         if (mbuf->ol_flags & PKT_TX_VLAN_PKT) {
1732                 first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1733                 first_buf->vlan_tci = mbuf->vlan_tci;
1734         }
1735
1736         avp_dev_buffer_sanity_check(avp, buffers[0]);
1737
1738         return total_length;
1739 }
1740
1741
1742 static uint16_t
1743 avp_xmit_scattered_pkts(void *tx_queue,
1744                         struct rte_mbuf **tx_pkts,
1745                         uint16_t nb_pkts)
1746 {
1747         struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST *
1748                                        RTE_AVP_MAX_MBUF_SEGMENTS)];
1749         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1750         struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST];
1751         struct avp_dev *avp = txq->avp;
1752         struct rte_avp_fifo *alloc_q;
1753         struct rte_avp_fifo *tx_q;
1754         unsigned int count, avail, n;
1755         unsigned int orig_nb_pkts;
1756         struct rte_mbuf *m;
1757         unsigned int required;
1758         unsigned int segments;
1759         unsigned int tx_bytes;
1760         unsigned int i;
1761
1762         orig_nb_pkts = nb_pkts;
1763         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1764                 /* VM live migration in progress */
1765                 /* TODO ... buffer for X packets then drop? */
1766                 txq->errors += nb_pkts;
1767                 return 0;
1768         }
1769
1770         tx_q = avp->tx_q[txq->queue_id];
1771         alloc_q = avp->alloc_q[txq->queue_id];
1772
1773         /* limit the number of transmitted packets to the max burst size */
1774         if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1775                 nb_pkts = AVP_MAX_TX_BURST;
1776
1777         /* determine how many buffers are available to copy into */
1778         avail = avp_fifo_count(alloc_q);
1779         if (unlikely(avail > (AVP_MAX_TX_BURST *
1780                               RTE_AVP_MAX_MBUF_SEGMENTS)))
1781                 avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS;
1782
1783         /* determine how many slots are available in the transmit queue */
1784         count = avp_fifo_free_count(tx_q);
1785
1786         /* determine how many packets can be sent */
1787         nb_pkts = RTE_MIN(count, nb_pkts);
1788
1789         /* determine how many packets will fit in the available buffers */
1790         count = 0;
1791         segments = 0;
1792         for (i = 0; i < nb_pkts; i++) {
1793                 m = tx_pkts[i];
1794                 if (likely(i < (unsigned int)nb_pkts - 1)) {
1795                         /* prefetch next entry while processing this one */
1796                         rte_prefetch0(tx_pkts[i + 1]);
1797                 }
1798                 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1799                         avp->host_mbuf_size;
1800
1801                 if (unlikely((required == 0) ||
1802                              (required > RTE_AVP_MAX_MBUF_SEGMENTS)))
1803                         break;
1804                 else if (unlikely(required + segments > avail))
1805                         break;
1806                 segments += required;
1807                 count++;
1808         }
1809         nb_pkts = count;
1810
1811         if (unlikely(nb_pkts == 0)) {
1812                 /* no available buffers, or no space on the tx queue */
1813                 txq->errors += orig_nb_pkts;
1814                 return 0;
1815         }
1816
1817         PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1818                    nb_pkts, tx_q);
1819
1820         /* retrieve sufficient send buffers */
1821         n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments);
1822         if (unlikely(n != segments)) {
1823                 PMD_TX_LOG(DEBUG, "Failed to allocate buffers "
1824                            "n=%u, segments=%u, orig=%u\n",
1825                            n, segments, orig_nb_pkts);
1826                 txq->errors += orig_nb_pkts;
1827                 return 0;
1828         }
1829
1830         tx_bytes = 0;
1831         count = 0;
1832         for (i = 0; i < nb_pkts; i++) {
1833                 /* process each packet to be transmitted */
1834                 m = tx_pkts[i];
1835
1836                 /* determine how many buffers are required for this packet */
1837                 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1838                         avp->host_mbuf_size;
1839
1840                 tx_bytes += avp_dev_copy_to_buffers(avp, m,
1841                                                     &avp_bufs[count], required);
1842                 tx_bufs[i] = avp_bufs[count];
1843                 count += required;
1844
1845                 /* free the original mbuf */
1846                 rte_pktmbuf_free(m);
1847         }
1848
1849         txq->packets += nb_pkts;
1850         txq->bytes += tx_bytes;
1851
1852 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1853         for (i = 0; i < nb_pkts; i++)
1854                 avp_dev_buffer_sanity_check(avp, tx_bufs[i]);
1855 #endif
1856
1857         /* send the packets */
1858         n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts);
1859         if (unlikely(n != orig_nb_pkts))
1860                 txq->errors += (orig_nb_pkts - n);
1861
1862         return n;
1863 }
1864
1865
1866 static uint16_t
1867 avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1868 {
1869         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1870         struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST];
1871         struct avp_dev *avp = txq->avp;
1872         struct rte_avp_desc *pkt_buf;
1873         struct rte_avp_fifo *alloc_q;
1874         struct rte_avp_fifo *tx_q;
1875         unsigned int count, avail, n;
1876         struct rte_mbuf *m;
1877         unsigned int pkt_len;
1878         unsigned int tx_bytes;
1879         char *pkt_data;
1880         unsigned int i;
1881
1882         if (unlikely(avp->flags & AVP_F_DETACHED)) {
1883                 /* VM live migration in progress */
1884                 /* TODO ... buffer for X packets then drop?! */
1885                 txq->errors++;
1886                 return 0;
1887         }
1888
1889         tx_q = avp->tx_q[txq->queue_id];
1890         alloc_q = avp->alloc_q[txq->queue_id];
1891
1892         /* limit the number of transmitted packets to the max burst size */
1893         if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1894                 nb_pkts = AVP_MAX_TX_BURST;
1895
1896         /* determine how many buffers are available to copy into */
1897         avail = avp_fifo_count(alloc_q);
1898
1899         /* determine how many slots are available in the transmit queue */
1900         count = avp_fifo_free_count(tx_q);
1901
1902         /* determine how many packets can be sent */
1903         count = RTE_MIN(count, avail);
1904         count = RTE_MIN(count, nb_pkts);
1905
1906         if (unlikely(count == 0)) {
1907                 /* no available buffers, or no space on the tx queue */
1908                 txq->errors += nb_pkts;
1909                 return 0;
1910         }
1911
1912         PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1913                    count, tx_q);
1914
1915         /* retrieve sufficient send buffers */
1916         n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count);
1917         if (unlikely(n != count)) {
1918                 txq->errors++;
1919                 return 0;
1920         }
1921
1922         tx_bytes = 0;
1923         for (i = 0; i < count; i++) {
1924                 /* prefetch next entry while processing the current one */
1925                 if (i < count - 1) {
1926                         pkt_buf = avp_dev_translate_buffer(avp,
1927                                                            avp_bufs[i + 1]);
1928                         rte_prefetch0(pkt_buf);
1929                 }
1930
1931                 /* process each packet to be transmitted */
1932                 m = tx_pkts[i];
1933
1934                 /* Adjust pointers for guest addressing */
1935                 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1936                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1937                 pkt_len = rte_pktmbuf_pkt_len(m);
1938
1939                 if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1940                                          (pkt_len > avp->host_mbuf_size))) {
1941                         /*
1942                          * application should be using the scattered transmit
1943                          * function; send it truncated to avoid the performance
1944                          * hit of having to manage returning the already
1945                          * allocated buffer to the free list.  This should not
1946                          * happen since the application should have set the
1947                          * max_rx_pkt_len based on its MTU and it should be
1948                          * policing its own packet sizes.
1949                          */
1950                         txq->errors++;
1951                         pkt_len = RTE_MIN(avp->guest_mbuf_size,
1952                                           avp->host_mbuf_size);
1953                 }
1954
1955                 /* copy data out of our mbuf and into the AVP buffer */
1956                 rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len);
1957                 pkt_buf->pkt_len = pkt_len;
1958                 pkt_buf->data_len = pkt_len;
1959                 pkt_buf->nb_segs = 1;
1960                 pkt_buf->next = NULL;
1961
1962                 if (m->ol_flags & PKT_TX_VLAN_PKT) {
1963                         pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1964                         pkt_buf->vlan_tci = m->vlan_tci;
1965                 }
1966
1967                 tx_bytes += pkt_len;
1968
1969                 /* free the original mbuf */
1970                 rte_pktmbuf_free(m);
1971         }
1972
1973         txq->packets += count;
1974         txq->bytes += tx_bytes;
1975
1976         /* send the packets */
1977         n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count);
1978
1979         return n;
1980 }
1981
1982 static void
1983 avp_dev_rx_queue_release(void *rx_queue)
1984 {
1985         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1986         struct avp_dev *avp = rxq->avp;
1987         struct rte_eth_dev_data *data = avp->dev_data;
1988         unsigned int i;
1989
1990         for (i = 0; i < avp->num_rx_queues; i++) {
1991                 if (data->rx_queues[i] == rxq)
1992                         data->rx_queues[i] = NULL;
1993         }
1994 }
1995
1996 static void
1997 avp_dev_tx_queue_release(void *tx_queue)
1998 {
1999         struct avp_queue *txq = (struct avp_queue *)tx_queue;
2000         struct avp_dev *avp = txq->avp;
2001         struct rte_eth_dev_data *data = avp->dev_data;
2002         unsigned int i;
2003
2004         for (i = 0; i < avp->num_tx_queues; i++) {
2005                 if (data->tx_queues[i] == txq)
2006                         data->tx_queues[i] = NULL;
2007         }
2008 }
2009
2010 static int
2011 avp_dev_configure(struct rte_eth_dev *eth_dev)
2012 {
2013         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
2014         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2015         struct rte_avp_device_info *host_info;
2016         struct rte_avp_device_config config;
2017         int mask = 0;
2018         void *addr;
2019         int ret;
2020
2021         rte_spinlock_lock(&avp->lock);
2022         if (avp->flags & AVP_F_DETACHED) {
2023                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2024                 ret = -ENOTSUP;
2025                 goto unlock;
2026         }
2027
2028         addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
2029         host_info = (struct rte_avp_device_info *)addr;
2030
2031         /* Setup required number of queues */
2032         _avp_set_queue_counts(eth_dev);
2033
2034         mask = (ETH_VLAN_STRIP_MASK |
2035                 ETH_VLAN_FILTER_MASK |
2036                 ETH_VLAN_EXTEND_MASK);
2037         avp_vlan_offload_set(eth_dev, mask);
2038
2039         /* update device config */
2040         memset(&config, 0, sizeof(config));
2041         config.device_id = host_info->device_id;
2042         config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
2043         config.driver_version = AVP_DPDK_DRIVER_VERSION;
2044         config.features = avp->features;
2045         config.num_tx_queues = avp->num_tx_queues;
2046         config.num_rx_queues = avp->num_rx_queues;
2047
2048         ret = avp_dev_ctrl_set_config(eth_dev, &config);
2049         if (ret < 0) {
2050                 PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
2051                             ret);
2052                 goto unlock;
2053         }
2054
2055         avp->flags |= AVP_F_CONFIGURED;
2056         ret = 0;
2057
2058 unlock:
2059         rte_spinlock_unlock(&avp->lock);
2060         return ret;
2061 }
2062
2063 static int
2064 avp_dev_start(struct rte_eth_dev *eth_dev)
2065 {
2066         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2067         int ret;
2068
2069         rte_spinlock_lock(&avp->lock);
2070         if (avp->flags & AVP_F_DETACHED) {
2071                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2072                 ret = -ENOTSUP;
2073                 goto unlock;
2074         }
2075
2076         /* disable features that we do not support */
2077         eth_dev->data->dev_conf.rxmode.hw_ip_checksum = 0;
2078         eth_dev->data->dev_conf.rxmode.hw_vlan_filter = 0;
2079         eth_dev->data->dev_conf.rxmode.hw_vlan_extend = 0;
2080         eth_dev->data->dev_conf.rxmode.hw_strip_crc = 0;
2081
2082         /* update link state */
2083         ret = avp_dev_ctrl_set_link_state(eth_dev, 1);
2084         if (ret < 0) {
2085                 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2086                             ret);
2087                 goto unlock;
2088         }
2089
2090         /* remember current link state */
2091         avp->flags |= AVP_F_LINKUP;
2092
2093         ret = 0;
2094
2095 unlock:
2096         rte_spinlock_unlock(&avp->lock);
2097         return ret;
2098 }
2099
2100 static void
2101 avp_dev_stop(struct rte_eth_dev *eth_dev)
2102 {
2103         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2104         int ret;
2105
2106         rte_spinlock_lock(&avp->lock);
2107         if (avp->flags & AVP_F_DETACHED) {
2108                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2109                 goto unlock;
2110         }
2111
2112         /* remember current link state */
2113         avp->flags &= ~AVP_F_LINKUP;
2114
2115         /* update link state */
2116         ret = avp_dev_ctrl_set_link_state(eth_dev, 0);
2117         if (ret < 0) {
2118                 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2119                             ret);
2120         }
2121
2122 unlock:
2123         rte_spinlock_unlock(&avp->lock);
2124 }
2125
2126 static void
2127 avp_dev_close(struct rte_eth_dev *eth_dev)
2128 {
2129         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2130         int ret;
2131
2132         rte_spinlock_lock(&avp->lock);
2133         if (avp->flags & AVP_F_DETACHED) {
2134                 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2135                 goto unlock;
2136         }
2137
2138         /* remember current link state */
2139         avp->flags &= ~AVP_F_LINKUP;
2140         avp->flags &= ~AVP_F_CONFIGURED;
2141
2142         ret = avp_dev_disable_interrupts(eth_dev);
2143         if (ret < 0) {
2144                 PMD_DRV_LOG(ERR, "Failed to disable interrupts\n");
2145                 /* continue */
2146         }
2147
2148         /* update device state */
2149         ret = avp_dev_ctrl_shutdown(eth_dev);
2150         if (ret < 0) {
2151                 PMD_DRV_LOG(ERR, "Device shutdown failed by host, ret=%d\n",
2152                             ret);
2153                 /* continue */
2154         }
2155
2156 unlock:
2157         rte_spinlock_unlock(&avp->lock);
2158 }
2159
2160 static int
2161 avp_dev_link_update(struct rte_eth_dev *eth_dev,
2162                                         __rte_unused int wait_to_complete)
2163 {
2164         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2165         struct rte_eth_link *link = &eth_dev->data->dev_link;
2166
2167         link->link_speed = ETH_SPEED_NUM_10G;
2168         link->link_duplex = ETH_LINK_FULL_DUPLEX;
2169         link->link_status = !!(avp->flags & AVP_F_LINKUP);
2170
2171         return -1;
2172 }
2173
2174 static void
2175 avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2176 {
2177         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2178
2179         rte_spinlock_lock(&avp->lock);
2180         if ((avp->flags & AVP_F_PROMISC) == 0) {
2181                 avp->flags |= AVP_F_PROMISC;
2182                 PMD_DRV_LOG(DEBUG, "Promiscuous mode enabled on %u\n",
2183                             eth_dev->data->port_id);
2184         }
2185         rte_spinlock_unlock(&avp->lock);
2186 }
2187
2188 static void
2189 avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev)
2190 {
2191         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2192
2193         rte_spinlock_lock(&avp->lock);
2194         if ((avp->flags & AVP_F_PROMISC) != 0) {
2195                 avp->flags &= ~AVP_F_PROMISC;
2196                 PMD_DRV_LOG(DEBUG, "Promiscuous mode disabled on %u\n",
2197                             eth_dev->data->port_id);
2198         }
2199         rte_spinlock_unlock(&avp->lock);
2200 }
2201
2202 static void
2203 avp_dev_info_get(struct rte_eth_dev *eth_dev,
2204                  struct rte_eth_dev_info *dev_info)
2205 {
2206         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2207
2208         dev_info->driver_name = "rte_avp_pmd";
2209         dev_info->pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
2210         dev_info->max_rx_queues = avp->max_rx_queues;
2211         dev_info->max_tx_queues = avp->max_tx_queues;
2212         dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE;
2213         dev_info->max_rx_pktlen = avp->max_rx_pkt_len;
2214         dev_info->max_mac_addrs = AVP_MAX_MAC_ADDRS;
2215         if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2216                 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2217                 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
2218         }
2219 }
2220
2221 static void
2222 avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
2223 {
2224         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2225
2226         if (mask & ETH_VLAN_STRIP_MASK) {
2227                 if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2228                         if (eth_dev->data->dev_conf.rxmode.hw_vlan_strip)
2229                                 avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD;
2230                         else
2231                                 avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD;
2232                 } else {
2233                         PMD_DRV_LOG(ERR, "VLAN strip offload not supported\n");
2234                 }
2235         }
2236
2237         if (mask & ETH_VLAN_FILTER_MASK) {
2238                 if (eth_dev->data->dev_conf.rxmode.hw_vlan_filter)
2239                         PMD_DRV_LOG(ERR, "VLAN filter offload not supported\n");
2240         }
2241
2242         if (mask & ETH_VLAN_EXTEND_MASK) {
2243                 if (eth_dev->data->dev_conf.rxmode.hw_vlan_extend)
2244                         PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n");
2245         }
2246 }
2247
2248 static void
2249 avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats)
2250 {
2251         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2252         unsigned int i;
2253
2254         for (i = 0; i < avp->num_rx_queues; i++) {
2255                 struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2256
2257                 if (rxq) {
2258                         stats->ipackets += rxq->packets;
2259                         stats->ibytes += rxq->bytes;
2260                         stats->ierrors += rxq->errors;
2261
2262                         stats->q_ipackets[i] += rxq->packets;
2263                         stats->q_ibytes[i] += rxq->bytes;
2264                         stats->q_errors[i] += rxq->errors;
2265                 }
2266         }
2267
2268         for (i = 0; i < avp->num_tx_queues; i++) {
2269                 struct avp_queue *txq = avp->dev_data->tx_queues[i];
2270
2271                 if (txq) {
2272                         stats->opackets += txq->packets;
2273                         stats->obytes += txq->bytes;
2274                         stats->oerrors += txq->errors;
2275
2276                         stats->q_opackets[i] += txq->packets;
2277                         stats->q_obytes[i] += txq->bytes;
2278                         stats->q_errors[i] += txq->errors;
2279                 }
2280         }
2281 }
2282
2283 static void
2284 avp_dev_stats_reset(struct rte_eth_dev *eth_dev)
2285 {
2286         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2287         unsigned int i;
2288
2289         for (i = 0; i < avp->num_rx_queues; i++) {
2290                 struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2291
2292                 if (rxq) {
2293                         rxq->bytes = 0;
2294                         rxq->packets = 0;
2295                         rxq->errors = 0;
2296                 }
2297         }
2298
2299         for (i = 0; i < avp->num_tx_queues; i++) {
2300                 struct avp_queue *txq = avp->dev_data->tx_queues[i];
2301
2302                 if (txq) {
2303                         txq->bytes = 0;
2304                         txq->packets = 0;
2305                         txq->errors = 0;
2306                 }
2307         }
2308 }
2309
2310 RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd);
2311 RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map);