net/avp: add packet transmit functions
[dpdk.git] / drivers / net / avp / avp_ethdev.c
1 /*
2  *   BSD LICENSE
3  *
4  * Copyright (c) 2013-2017, Wind River Systems, Inc.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1) Redistributions of source code must retain the above copyright notice,
10  * this list of conditions and the following disclaimer.
11  *
12  * 2) Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * 3) Neither the name of Wind River Systems nor the names of its contributors
17  * may be used to endorse or promote products derived from this software
18  * without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <stdint.h>
34 #include <string.h>
35 #include <stdio.h>
36 #include <errno.h>
37 #include <unistd.h>
38
39 #include <rte_ethdev.h>
40 #include <rte_memcpy.h>
41 #include <rte_string_fns.h>
42 #include <rte_memzone.h>
43 #include <rte_malloc.h>
44 #include <rte_atomic.h>
45 #include <rte_branch_prediction.h>
46 #include <rte_pci.h>
47 #include <rte_ether.h>
48 #include <rte_common.h>
49 #include <rte_cycles.h>
50 #include <rte_byteorder.h>
51 #include <rte_dev.h>
52 #include <rte_memory.h>
53 #include <rte_eal.h>
54 #include <rte_io.h>
55
56 #include "rte_avp_common.h"
57 #include "rte_avp_fifo.h"
58
59 #include "avp_logs.h"
60
61
62
63 static int avp_dev_configure(struct rte_eth_dev *dev);
64 static void avp_dev_info_get(struct rte_eth_dev *dev,
65                              struct rte_eth_dev_info *dev_info);
66 static void avp_vlan_offload_set(struct rte_eth_dev *dev, int mask);
67 static int avp_dev_link_update(struct rte_eth_dev *dev,
68                                __rte_unused int wait_to_complete);
69 static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev,
70                                   uint16_t rx_queue_id,
71                                   uint16_t nb_rx_desc,
72                                   unsigned int socket_id,
73                                   const struct rte_eth_rxconf *rx_conf,
74                                   struct rte_mempool *pool);
75
76 static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev,
77                                   uint16_t tx_queue_id,
78                                   uint16_t nb_tx_desc,
79                                   unsigned int socket_id,
80                                   const struct rte_eth_txconf *tx_conf);
81
82 static uint16_t avp_recv_scattered_pkts(void *rx_queue,
83                                         struct rte_mbuf **rx_pkts,
84                                         uint16_t nb_pkts);
85
86 static uint16_t avp_recv_pkts(void *rx_queue,
87                               struct rte_mbuf **rx_pkts,
88                               uint16_t nb_pkts);
89
90 static uint16_t avp_xmit_scattered_pkts(void *tx_queue,
91                                         struct rte_mbuf **tx_pkts,
92                                         uint16_t nb_pkts);
93
94 static uint16_t avp_xmit_pkts(void *tx_queue,
95                               struct rte_mbuf **tx_pkts,
96                               uint16_t nb_pkts);
97
98 static void avp_dev_rx_queue_release(void *rxq);
99 static void avp_dev_tx_queue_release(void *txq);
100
101
102 #define AVP_DEV_TO_PCI(eth_dev) RTE_DEV_TO_PCI((eth_dev)->device)
103
104
105 #define AVP_MAX_RX_BURST 64
106 #define AVP_MAX_TX_BURST 64
107 #define AVP_MAX_MAC_ADDRS 1
108 #define AVP_MIN_RX_BUFSIZE ETHER_MIN_LEN
109
110
111 /*
112  * Defines the number of microseconds to wait before checking the response
113  * queue for completion.
114  */
115 #define AVP_REQUEST_DELAY_USECS (5000)
116
117 /*
118  * Defines the number times to check the response queue for completion before
119  * declaring a timeout.
120  */
121 #define AVP_MAX_REQUEST_RETRY (100)
122
123 /* Defines the current PCI driver version number */
124 #define AVP_DPDK_DRIVER_VERSION RTE_AVP_CURRENT_GUEST_VERSION
125
126 /*
127  * The set of PCI devices this driver supports
128  */
129 static const struct rte_pci_id pci_id_avp_map[] = {
130         { .vendor_id = RTE_AVP_PCI_VENDOR_ID,
131           .device_id = RTE_AVP_PCI_DEVICE_ID,
132           .subsystem_vendor_id = RTE_AVP_PCI_SUB_VENDOR_ID,
133           .subsystem_device_id = RTE_AVP_PCI_SUB_DEVICE_ID,
134           .class_id = RTE_CLASS_ANY_ID,
135         },
136
137         { .vendor_id = 0, /* sentinel */
138         },
139 };
140
141 /*
142  * dev_ops for avp, bare necessities for basic operation
143  */
144 static const struct eth_dev_ops avp_eth_dev_ops = {
145         .dev_configure       = avp_dev_configure,
146         .dev_infos_get       = avp_dev_info_get,
147         .vlan_offload_set    = avp_vlan_offload_set,
148         .link_update         = avp_dev_link_update,
149         .rx_queue_setup      = avp_dev_rx_queue_setup,
150         .rx_queue_release    = avp_dev_rx_queue_release,
151         .tx_queue_setup      = avp_dev_tx_queue_setup,
152         .tx_queue_release    = avp_dev_tx_queue_release,
153 };
154
155 /**@{ AVP device flags */
156 #define AVP_F_PROMISC (1 << 1)
157 #define AVP_F_CONFIGURED (1 << 2)
158 #define AVP_F_LINKUP (1 << 3)
159 /**@} */
160
161 /* Ethernet device validation marker */
162 #define AVP_ETHDEV_MAGIC 0x92972862
163
164 /*
165  * Defines the AVP device attributes which are attached to an RTE ethernet
166  * device
167  */
168 struct avp_dev {
169         uint32_t magic; /**< Memory validation marker */
170         uint64_t device_id; /**< Unique system identifier */
171         struct ether_addr ethaddr; /**< Host specified MAC address */
172         struct rte_eth_dev_data *dev_data;
173         /**< Back pointer to ethernet device data */
174         volatile uint32_t flags; /**< Device operational flags */
175         uint8_t port_id; /**< Ethernet port identifier */
176         struct rte_mempool *pool; /**< pkt mbuf mempool */
177         unsigned int guest_mbuf_size; /**< local pool mbuf size */
178         unsigned int host_mbuf_size; /**< host mbuf size */
179         unsigned int max_rx_pkt_len; /**< maximum receive unit */
180         uint32_t host_features; /**< Supported feature bitmap */
181         uint32_t features; /**< Enabled feature bitmap */
182         unsigned int num_tx_queues; /**< Negotiated number of transmit queues */
183         unsigned int max_tx_queues; /**< Maximum number of transmit queues */
184         unsigned int num_rx_queues; /**< Negotiated number of receive queues */
185         unsigned int max_rx_queues; /**< Maximum number of receive queues */
186
187         struct rte_avp_fifo *tx_q[RTE_AVP_MAX_QUEUES]; /**< TX queue */
188         struct rte_avp_fifo *rx_q[RTE_AVP_MAX_QUEUES]; /**< RX queue */
189         struct rte_avp_fifo *alloc_q[RTE_AVP_MAX_QUEUES];
190         /**< Allocated mbufs queue */
191         struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES];
192         /**< To be freed mbufs queue */
193
194         /* For request & response */
195         struct rte_avp_fifo *req_q; /**< Request queue */
196         struct rte_avp_fifo *resp_q; /**< Response queue */
197         void *host_sync_addr; /**< (host) Req/Resp Mem address */
198         void *sync_addr; /**< Req/Resp Mem address */
199         void *host_mbuf_addr; /**< (host) MBUF pool start address */
200         void *mbuf_addr; /**< MBUF pool start address */
201 } __rte_cache_aligned;
202
203 /* RTE ethernet private data */
204 struct avp_adapter {
205         struct avp_dev avp;
206 } __rte_cache_aligned;
207
208
209 /* 32-bit MMIO register write */
210 #define AVP_WRITE32(_value, _addr) rte_write32_relaxed((_value), (_addr))
211
212 /* 32-bit MMIO register read */
213 #define AVP_READ32(_addr) rte_read32_relaxed((_addr))
214
215 /* Macro to cast the ethernet device private data to a AVP object */
216 #define AVP_DEV_PRIVATE_TO_HW(adapter) \
217         (&((struct avp_adapter *)adapter)->avp)
218
219 /*
220  * Defines the structure of a AVP device queue for the purpose of handling the
221  * receive and transmit burst callback functions
222  */
223 struct avp_queue {
224         struct rte_eth_dev_data *dev_data;
225         /**< Backpointer to ethernet device data */
226         struct avp_dev *avp; /**< Backpointer to AVP device */
227         uint16_t queue_id;
228         /**< Queue identifier used for indexing current queue */
229         uint16_t queue_base;
230         /**< Base queue identifier for queue servicing */
231         uint16_t queue_limit;
232         /**< Maximum queue identifier for queue servicing */
233
234         uint64_t packets;
235         uint64_t bytes;
236         uint64_t errors;
237 };
238
239 /* send a request and wait for a response
240  *
241  * @warning must be called while holding the avp->lock spinlock.
242  */
243 static int
244 avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request)
245 {
246         unsigned int retry = AVP_MAX_REQUEST_RETRY;
247         void *resp_addr = NULL;
248         unsigned int count;
249         int ret;
250
251         PMD_DRV_LOG(DEBUG, "Sending request %u to host\n", request->req_id);
252
253         request->result = -ENOTSUP;
254
255         /* Discard any stale responses before starting a new request */
256         while (avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1))
257                 PMD_DRV_LOG(DEBUG, "Discarding stale response\n");
258
259         rte_memcpy(avp->sync_addr, request, sizeof(*request));
260         count = avp_fifo_put(avp->req_q, &avp->host_sync_addr, 1);
261         if (count < 1) {
262                 PMD_DRV_LOG(ERR, "Cannot send request %u to host\n",
263                             request->req_id);
264                 ret = -EBUSY;
265                 goto done;
266         }
267
268         while (retry--) {
269                 /* wait for a response */
270                 usleep(AVP_REQUEST_DELAY_USECS);
271
272                 count = avp_fifo_count(avp->resp_q);
273                 if (count >= 1) {
274                         /* response received */
275                         break;
276                 }
277
278                 if ((count < 1) && (retry == 0)) {
279                         PMD_DRV_LOG(ERR, "Timeout while waiting for a response for %u\n",
280                                     request->req_id);
281                         ret = -ETIME;
282                         goto done;
283                 }
284         }
285
286         /* retrieve the response */
287         count = avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1);
288         if ((count != 1) || (resp_addr != avp->host_sync_addr)) {
289                 PMD_DRV_LOG(ERR, "Invalid response from host, count=%u resp=%p host_sync_addr=%p\n",
290                             count, resp_addr, avp->host_sync_addr);
291                 ret = -ENODATA;
292                 goto done;
293         }
294
295         /* copy to user buffer */
296         rte_memcpy(request, avp->sync_addr, sizeof(*request));
297         ret = 0;
298
299         PMD_DRV_LOG(DEBUG, "Result %d received for request %u\n",
300                     request->result, request->req_id);
301
302 done:
303         return ret;
304 }
305
306 static int
307 avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev,
308                         struct rte_avp_device_config *config)
309 {
310         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
311         struct rte_avp_request request;
312         int ret;
313
314         /* setup a configure request */
315         memset(&request, 0, sizeof(request));
316         request.req_id = RTE_AVP_REQ_CFG_DEVICE;
317         memcpy(&request.config, config, sizeof(request.config));
318
319         ret = avp_dev_process_request(avp, &request);
320
321         return ret == 0 ? request.result : ret;
322 }
323
324 /* translate from host mbuf virtual address to guest virtual address */
325 static inline void *
326 avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address)
327 {
328         return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address,
329                                        (uintptr_t)avp->host_mbuf_addr),
330                            (uintptr_t)avp->mbuf_addr);
331 }
332
333 /* translate from host physical address to guest virtual address */
334 static void *
335 avp_dev_translate_address(struct rte_eth_dev *eth_dev,
336                           phys_addr_t host_phys_addr)
337 {
338         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
339         struct rte_mem_resource *resource;
340         struct rte_avp_memmap_info *info;
341         struct rte_avp_memmap *map;
342         off_t offset;
343         void *addr;
344         unsigned int i;
345
346         addr = pci_dev->mem_resource[RTE_AVP_PCI_MEMORY_BAR].addr;
347         resource = &pci_dev->mem_resource[RTE_AVP_PCI_MEMMAP_BAR];
348         info = (struct rte_avp_memmap_info *)resource->addr;
349
350         offset = 0;
351         for (i = 0; i < info->nb_maps; i++) {
352                 /* search all segments looking for a matching address */
353                 map = &info->maps[i];
354
355                 if ((host_phys_addr >= map->phys_addr) &&
356                         (host_phys_addr < (map->phys_addr + map->length))) {
357                         /* address is within this segment */
358                         offset += (host_phys_addr - map->phys_addr);
359                         addr = RTE_PTR_ADD(addr, offset);
360
361                         PMD_DRV_LOG(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p\n",
362                                     host_phys_addr, addr);
363
364                         return addr;
365                 }
366                 offset += map->length;
367         }
368
369         return NULL;
370 }
371
372 /* verify that the incoming device version is compatible with our version */
373 static int
374 avp_dev_version_check(uint32_t version)
375 {
376         uint32_t driver = RTE_AVP_STRIP_MINOR_VERSION(AVP_DPDK_DRIVER_VERSION);
377         uint32_t device = RTE_AVP_STRIP_MINOR_VERSION(version);
378
379         if (device <= driver) {
380                 /* the host driver version is less than or equal to ours */
381                 return 0;
382         }
383
384         return 1;
385 }
386
387 /* verify that memory regions have expected version and validation markers */
388 static int
389 avp_dev_check_regions(struct rte_eth_dev *eth_dev)
390 {
391         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
392         struct rte_avp_memmap_info *memmap;
393         struct rte_avp_device_info *info;
394         struct rte_mem_resource *resource;
395         unsigned int i;
396
397         /* Dump resource info for debug */
398         for (i = 0; i < PCI_MAX_RESOURCE; i++) {
399                 resource = &pci_dev->mem_resource[i];
400                 if ((resource->phys_addr == 0) || (resource->len == 0))
401                         continue;
402
403                 PMD_DRV_LOG(DEBUG, "resource[%u]: phys=0x%" PRIx64 " len=%" PRIu64 " addr=%p\n",
404                             i, resource->phys_addr,
405                             resource->len, resource->addr);
406
407                 switch (i) {
408                 case RTE_AVP_PCI_MEMMAP_BAR:
409                         memmap = (struct rte_avp_memmap_info *)resource->addr;
410                         if ((memmap->magic != RTE_AVP_MEMMAP_MAGIC) ||
411                             (memmap->version != RTE_AVP_MEMMAP_VERSION)) {
412                                 PMD_DRV_LOG(ERR, "Invalid memmap magic 0x%08x and version %u\n",
413                                             memmap->magic, memmap->version);
414                                 return -EINVAL;
415                         }
416                         break;
417
418                 case RTE_AVP_PCI_DEVICE_BAR:
419                         info = (struct rte_avp_device_info *)resource->addr;
420                         if ((info->magic != RTE_AVP_DEVICE_MAGIC) ||
421                             avp_dev_version_check(info->version)) {
422                                 PMD_DRV_LOG(ERR, "Invalid device info magic 0x%08x or version 0x%08x > 0x%08x\n",
423                                             info->magic, info->version,
424                                             AVP_DPDK_DRIVER_VERSION);
425                                 return -EINVAL;
426                         }
427                         break;
428
429                 case RTE_AVP_PCI_MEMORY_BAR:
430                 case RTE_AVP_PCI_MMIO_BAR:
431                         if (resource->addr == NULL) {
432                                 PMD_DRV_LOG(ERR, "Missing address space for BAR%u\n",
433                                             i);
434                                 return -EINVAL;
435                         }
436                         break;
437
438                 case RTE_AVP_PCI_MSIX_BAR:
439                 default:
440                         /* no validation required */
441                         break;
442                 }
443         }
444
445         return 0;
446 }
447
448 static void
449 _avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
450 {
451         struct avp_dev *avp =
452                 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
453         struct avp_queue *rxq;
454         uint16_t queue_count;
455         uint16_t remainder;
456
457         rxq = (struct avp_queue *)eth_dev->data->rx_queues[rx_queue_id];
458
459         /*
460          * Must map all AVP fifos as evenly as possible between the configured
461          * device queues.  Each device queue will service a subset of the AVP
462          * fifos. If there is an odd number of device queues the first set of
463          * device queues will get the extra AVP fifos.
464          */
465         queue_count = avp->num_rx_queues / eth_dev->data->nb_rx_queues;
466         remainder = avp->num_rx_queues % eth_dev->data->nb_rx_queues;
467         if (rx_queue_id < remainder) {
468                 /* these queues must service one extra FIFO */
469                 rxq->queue_base = rx_queue_id * (queue_count + 1);
470                 rxq->queue_limit = rxq->queue_base + (queue_count + 1) - 1;
471         } else {
472                 /* these queues service the regular number of FIFO */
473                 rxq->queue_base = ((remainder * (queue_count + 1)) +
474                                    ((rx_queue_id - remainder) * queue_count));
475                 rxq->queue_limit = rxq->queue_base + queue_count - 1;
476         }
477
478         PMD_DRV_LOG(DEBUG, "rxq %u at %p base %u limit %u\n",
479                     rx_queue_id, rxq, rxq->queue_base, rxq->queue_limit);
480
481         rxq->queue_id = rxq->queue_base;
482 }
483
484 static void
485 _avp_set_queue_counts(struct rte_eth_dev *eth_dev)
486 {
487         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
488         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
489         struct rte_avp_device_info *host_info;
490         void *addr;
491
492         addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
493         host_info = (struct rte_avp_device_info *)addr;
494
495         /*
496          * the transmit direction is not negotiated beyond respecting the max
497          * number of queues because the host can handle arbitrary guest tx
498          * queues (host rx queues).
499          */
500         avp->num_tx_queues = eth_dev->data->nb_tx_queues;
501
502         /*
503          * the receive direction is more restrictive.  The host requires a
504          * minimum number of guest rx queues (host tx queues) therefore
505          * negotiate a value that is at least as large as the host minimum
506          * requirement.  If the host and guest values are not identical then a
507          * mapping will be established in the receive_queue_setup function.
508          */
509         avp->num_rx_queues = RTE_MAX(host_info->min_rx_queues,
510                                      eth_dev->data->nb_rx_queues);
511
512         PMD_DRV_LOG(DEBUG, "Requesting %u Tx and %u Rx queues from host\n",
513                     avp->num_tx_queues, avp->num_rx_queues);
514 }
515
516 /*
517  * create a AVP device using the supplied device info by first translating it
518  * to guest address space(s).
519  */
520 static int
521 avp_dev_create(struct rte_pci_device *pci_dev,
522                struct rte_eth_dev *eth_dev)
523 {
524         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
525         struct rte_avp_device_info *host_info;
526         struct rte_mem_resource *resource;
527         unsigned int i;
528
529         resource = &pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR];
530         if (resource->addr == NULL) {
531                 PMD_DRV_LOG(ERR, "BAR%u is not mapped\n",
532                             RTE_AVP_PCI_DEVICE_BAR);
533                 return -EFAULT;
534         }
535         host_info = (struct rte_avp_device_info *)resource->addr;
536
537         if ((host_info->magic != RTE_AVP_DEVICE_MAGIC) ||
538                 avp_dev_version_check(host_info->version)) {
539                 PMD_DRV_LOG(ERR, "Invalid AVP PCI device, magic 0x%08x version 0x%08x > 0x%08x\n",
540                             host_info->magic, host_info->version,
541                             AVP_DPDK_DRIVER_VERSION);
542                 return -EINVAL;
543         }
544
545         PMD_DRV_LOG(DEBUG, "AVP host device is v%u.%u.%u\n",
546                     RTE_AVP_GET_RELEASE_VERSION(host_info->version),
547                     RTE_AVP_GET_MAJOR_VERSION(host_info->version),
548                     RTE_AVP_GET_MINOR_VERSION(host_info->version));
549
550         PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u TX queue(s)\n",
551                     host_info->min_tx_queues, host_info->max_tx_queues);
552         PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u RX queue(s)\n",
553                     host_info->min_rx_queues, host_info->max_rx_queues);
554         PMD_DRV_LOG(DEBUG, "AVP host supports features 0x%08x\n",
555                     host_info->features);
556
557         if (avp->magic != AVP_ETHDEV_MAGIC) {
558                 /*
559                  * First time initialization (i.e., not during a VM
560                  * migration)
561                  */
562                 memset(avp, 0, sizeof(*avp));
563                 avp->magic = AVP_ETHDEV_MAGIC;
564                 avp->dev_data = eth_dev->data;
565                 avp->port_id = eth_dev->data->port_id;
566                 avp->host_mbuf_size = host_info->mbuf_size;
567                 avp->host_features = host_info->features;
568                 memcpy(&avp->ethaddr.addr_bytes[0],
569                        host_info->ethaddr, ETHER_ADDR_LEN);
570                 /* adjust max values to not exceed our max */
571                 avp->max_tx_queues =
572                         RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES);
573                 avp->max_rx_queues =
574                         RTE_MIN(host_info->max_rx_queues, RTE_AVP_MAX_QUEUES);
575         } else {
576                 /* Re-attaching during migration */
577
578                 /* TODO... requires validation of host values */
579                 if ((host_info->features & avp->features) != avp->features) {
580                         PMD_DRV_LOG(ERR, "AVP host features mismatched; 0x%08x, host=0x%08x\n",
581                                     avp->features, host_info->features);
582                         /* this should not be possible; continue for now */
583                 }
584         }
585
586         /* the device id is allowed to change over migrations */
587         avp->device_id = host_info->device_id;
588
589         /* translate incoming host addresses to guest address space */
590         PMD_DRV_LOG(DEBUG, "AVP first host tx queue at 0x%" PRIx64 "\n",
591                     host_info->tx_phys);
592         PMD_DRV_LOG(DEBUG, "AVP first host alloc queue at 0x%" PRIx64 "\n",
593                     host_info->alloc_phys);
594         for (i = 0; i < avp->max_tx_queues; i++) {
595                 avp->tx_q[i] = avp_dev_translate_address(eth_dev,
596                         host_info->tx_phys + (i * host_info->tx_size));
597
598                 avp->alloc_q[i] = avp_dev_translate_address(eth_dev,
599                         host_info->alloc_phys + (i * host_info->alloc_size));
600         }
601
602         PMD_DRV_LOG(DEBUG, "AVP first host rx queue at 0x%" PRIx64 "\n",
603                     host_info->rx_phys);
604         PMD_DRV_LOG(DEBUG, "AVP first host free queue at 0x%" PRIx64 "\n",
605                     host_info->free_phys);
606         for (i = 0; i < avp->max_rx_queues; i++) {
607                 avp->rx_q[i] = avp_dev_translate_address(eth_dev,
608                         host_info->rx_phys + (i * host_info->rx_size));
609                 avp->free_q[i] = avp_dev_translate_address(eth_dev,
610                         host_info->free_phys + (i * host_info->free_size));
611         }
612
613         PMD_DRV_LOG(DEBUG, "AVP host request queue at 0x%" PRIx64 "\n",
614                     host_info->req_phys);
615         PMD_DRV_LOG(DEBUG, "AVP host response queue at 0x%" PRIx64 "\n",
616                     host_info->resp_phys);
617         PMD_DRV_LOG(DEBUG, "AVP host sync address at 0x%" PRIx64 "\n",
618                     host_info->sync_phys);
619         PMD_DRV_LOG(DEBUG, "AVP host mbuf address at 0x%" PRIx64 "\n",
620                     host_info->mbuf_phys);
621         avp->req_q = avp_dev_translate_address(eth_dev, host_info->req_phys);
622         avp->resp_q = avp_dev_translate_address(eth_dev, host_info->resp_phys);
623         avp->sync_addr =
624                 avp_dev_translate_address(eth_dev, host_info->sync_phys);
625         avp->mbuf_addr =
626                 avp_dev_translate_address(eth_dev, host_info->mbuf_phys);
627
628         /*
629          * store the host mbuf virtual address so that we can calculate
630          * relative offsets for each mbuf as they are processed
631          */
632         avp->host_mbuf_addr = host_info->mbuf_va;
633         avp->host_sync_addr = host_info->sync_va;
634
635         /*
636          * store the maximum packet length that is supported by the host.
637          */
638         avp->max_rx_pkt_len = host_info->max_rx_pkt_len;
639         PMD_DRV_LOG(DEBUG, "AVP host max receive packet length is %u\n",
640                                 host_info->max_rx_pkt_len);
641
642         return 0;
643 }
644
645 /*
646  * This function is based on probe() function in avp_pci.c
647  * It returns 0 on success.
648  */
649 static int
650 eth_avp_dev_init(struct rte_eth_dev *eth_dev)
651 {
652         struct avp_dev *avp =
653                 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
654         struct rte_pci_device *pci_dev;
655         int ret;
656
657         pci_dev = AVP_DEV_TO_PCI(eth_dev);
658         eth_dev->dev_ops = &avp_eth_dev_ops;
659         eth_dev->rx_pkt_burst = &avp_recv_pkts;
660         eth_dev->tx_pkt_burst = &avp_xmit_pkts;
661
662         if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
663                 /*
664                  * no setup required on secondary processes.  All data is saved
665                  * in dev_private by the primary process. All resource should
666                  * be mapped to the same virtual address so all pointers should
667                  * be valid.
668                  */
669                 if (eth_dev->data->scattered_rx) {
670                         PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
671                         eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
672                         eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
673                 }
674                 return 0;
675         }
676
677         rte_eth_copy_pci_info(eth_dev, pci_dev);
678
679         eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
680
681         /* Check BAR resources */
682         ret = avp_dev_check_regions(eth_dev);
683         if (ret < 0) {
684                 PMD_DRV_LOG(ERR, "Failed to validate BAR resources, ret=%d\n",
685                             ret);
686                 return ret;
687         }
688
689         /* Handle each subtype */
690         ret = avp_dev_create(pci_dev, eth_dev);
691         if (ret < 0) {
692                 PMD_DRV_LOG(ERR, "Failed to create device, ret=%d\n", ret);
693                 return ret;
694         }
695
696         /* Allocate memory for storing MAC addresses */
697         eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev", ETHER_ADDR_LEN, 0);
698         if (eth_dev->data->mac_addrs == NULL) {
699                 PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to store MAC addresses\n",
700                             ETHER_ADDR_LEN);
701                 return -ENOMEM;
702         }
703
704         /* Get a mac from device config */
705         ether_addr_copy(&avp->ethaddr, &eth_dev->data->mac_addrs[0]);
706
707         return 0;
708 }
709
710 static int
711 eth_avp_dev_uninit(struct rte_eth_dev *eth_dev)
712 {
713         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
714                 return -EPERM;
715
716         if (eth_dev->data == NULL)
717                 return 0;
718
719         if (eth_dev->data->mac_addrs != NULL) {
720                 rte_free(eth_dev->data->mac_addrs);
721                 eth_dev->data->mac_addrs = NULL;
722         }
723
724         return 0;
725 }
726
727
728 static struct eth_driver rte_avp_pmd = {
729         {
730                 .id_table = pci_id_avp_map,
731                 .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
732                 .probe = rte_eth_dev_pci_probe,
733                 .remove = rte_eth_dev_pci_remove,
734         },
735         .eth_dev_init = eth_avp_dev_init,
736         .eth_dev_uninit = eth_avp_dev_uninit,
737         .dev_private_size = sizeof(struct avp_adapter),
738 };
739
740 static int
741 avp_dev_enable_scattered(struct rte_eth_dev *eth_dev,
742                          struct avp_dev *avp)
743 {
744         unsigned int max_rx_pkt_len;
745
746         max_rx_pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
747
748         if ((max_rx_pkt_len > avp->guest_mbuf_size) ||
749             (max_rx_pkt_len > avp->host_mbuf_size)) {
750                 /*
751                  * If the guest MTU is greater than either the host or guest
752                  * buffers then chained mbufs have to be enabled in the TX
753                  * direction.  It is assumed that the application will not need
754                  * to send packets larger than their max_rx_pkt_len (MRU).
755                  */
756                 return 1;
757         }
758
759         if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) ||
760             (avp->max_rx_pkt_len > avp->host_mbuf_size)) {
761                 /*
762                  * If the host MRU is greater than its own mbuf size or the
763                  * guest mbuf size then chained mbufs have to be enabled in the
764                  * RX direction.
765                  */
766                 return 1;
767         }
768
769         return 0;
770 }
771
772 static int
773 avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
774                        uint16_t rx_queue_id,
775                        uint16_t nb_rx_desc,
776                        unsigned int socket_id,
777                        const struct rte_eth_rxconf *rx_conf,
778                        struct rte_mempool *pool)
779 {
780         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
781         struct rte_pktmbuf_pool_private *mbp_priv;
782         struct avp_queue *rxq;
783
784         if (rx_queue_id >= eth_dev->data->nb_rx_queues) {
785                 PMD_DRV_LOG(ERR, "RX queue id is out of range: rx_queue_id=%u, nb_rx_queues=%u\n",
786                             rx_queue_id, eth_dev->data->nb_rx_queues);
787                 return -EINVAL;
788         }
789
790         /* Save mbuf pool pointer */
791         avp->pool = pool;
792
793         /* Save the local mbuf size */
794         mbp_priv = rte_mempool_get_priv(pool);
795         avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size);
796         avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM;
797
798         if (avp_dev_enable_scattered(eth_dev, avp)) {
799                 if (!eth_dev->data->scattered_rx) {
800                         PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
801                         eth_dev->data->scattered_rx = 1;
802                         eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
803                         eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
804                 }
805         }
806
807         PMD_DRV_LOG(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)\n",
808                     avp->max_rx_pkt_len,
809                     eth_dev->data->dev_conf.rxmode.max_rx_pkt_len,
810                     avp->host_mbuf_size,
811                     avp->guest_mbuf_size);
812
813         /* allocate a queue object */
814         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct avp_queue),
815                                  RTE_CACHE_LINE_SIZE, socket_id);
816         if (rxq == NULL) {
817                 PMD_DRV_LOG(ERR, "Failed to allocate new Rx queue object\n");
818                 return -ENOMEM;
819         }
820
821         /* save back pointers to AVP and Ethernet devices */
822         rxq->avp = avp;
823         rxq->dev_data = eth_dev->data;
824         eth_dev->data->rx_queues[rx_queue_id] = (void *)rxq;
825
826         /* setup the queue receive mapping for the current queue. */
827         _avp_set_rx_queue_mappings(eth_dev, rx_queue_id);
828
829         PMD_DRV_LOG(DEBUG, "Rx queue %u setup at %p\n", rx_queue_id, rxq);
830
831         (void)nb_rx_desc;
832         (void)rx_conf;
833         return 0;
834 }
835
836 static int
837 avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev,
838                        uint16_t tx_queue_id,
839                        uint16_t nb_tx_desc,
840                        unsigned int socket_id,
841                        const struct rte_eth_txconf *tx_conf)
842 {
843         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
844         struct avp_queue *txq;
845
846         if (tx_queue_id >= eth_dev->data->nb_tx_queues) {
847                 PMD_DRV_LOG(ERR, "TX queue id is out of range: tx_queue_id=%u, nb_tx_queues=%u\n",
848                             tx_queue_id, eth_dev->data->nb_tx_queues);
849                 return -EINVAL;
850         }
851
852         /* allocate a queue object */
853         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct avp_queue),
854                                  RTE_CACHE_LINE_SIZE, socket_id);
855         if (txq == NULL) {
856                 PMD_DRV_LOG(ERR, "Failed to allocate new Tx queue object\n");
857                 return -ENOMEM;
858         }
859
860         /* only the configured set of transmit queues are used */
861         txq->queue_id = tx_queue_id;
862         txq->queue_base = tx_queue_id;
863         txq->queue_limit = tx_queue_id;
864
865         /* save back pointers to AVP and Ethernet devices */
866         txq->avp = avp;
867         txq->dev_data = eth_dev->data;
868         eth_dev->data->tx_queues[tx_queue_id] = (void *)txq;
869
870         PMD_DRV_LOG(DEBUG, "Tx queue %u setup at %p\n", tx_queue_id, txq);
871
872         (void)nb_tx_desc;
873         (void)tx_conf;
874         return 0;
875 }
876
877 static inline int
878 _avp_cmp_ether_addr(struct ether_addr *a, struct ether_addr *b)
879 {
880         uint16_t *_a = (uint16_t *)&a->addr_bytes[0];
881         uint16_t *_b = (uint16_t *)&b->addr_bytes[0];
882         return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]);
883 }
884
885 static inline int
886 _avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m)
887 {
888         struct ether_hdr *eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
889
890         if (likely(_avp_cmp_ether_addr(&avp->ethaddr, &eth->d_addr) == 0)) {
891                 /* allow all packets destined to our address */
892                 return 0;
893         }
894
895         if (likely(is_broadcast_ether_addr(&eth->d_addr))) {
896                 /* allow all broadcast packets */
897                 return 0;
898         }
899
900         if (likely(is_multicast_ether_addr(&eth->d_addr))) {
901                 /* allow all multicast packets */
902                 return 0;
903         }
904
905         if (avp->flags & AVP_F_PROMISC) {
906                 /* allow all packets when in promiscuous mode */
907                 return 0;
908         }
909
910         return -1;
911 }
912
913 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
914 static inline void
915 __avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf)
916 {
917         struct rte_avp_desc *first_buf;
918         struct rte_avp_desc *pkt_buf;
919         unsigned int pkt_len;
920         unsigned int nb_segs;
921         void *pkt_data;
922         unsigned int i;
923
924         first_buf = avp_dev_translate_buffer(avp, buf);
925
926         i = 0;
927         pkt_len = 0;
928         nb_segs = first_buf->nb_segs;
929         do {
930                 /* Adjust pointers for guest addressing */
931                 pkt_buf = avp_dev_translate_buffer(avp, buf);
932                 if (pkt_buf == NULL)
933                         rte_panic("bad buffer: segment %u has an invalid address %p\n",
934                                   i, buf);
935                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
936                 if (pkt_data == NULL)
937                         rte_panic("bad buffer: segment %u has a NULL data pointer\n",
938                                   i);
939                 if (pkt_buf->data_len == 0)
940                         rte_panic("bad buffer: segment %u has 0 data length\n",
941                                   i);
942                 pkt_len += pkt_buf->data_len;
943                 nb_segs--;
944                 i++;
945
946         } while (nb_segs && (buf = pkt_buf->next) != NULL);
947
948         if (nb_segs != 0)
949                 rte_panic("bad buffer: expected %u segments found %u\n",
950                           first_buf->nb_segs, (first_buf->nb_segs - nb_segs));
951         if (pkt_len != first_buf->pkt_len)
952                 rte_panic("bad buffer: expected length %u found %u\n",
953                           first_buf->pkt_len, pkt_len);
954 }
955
956 #define avp_dev_buffer_sanity_check(a, b) \
957         __avp_dev_buffer_sanity_check((a), (b))
958
959 #else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */
960
961 #define avp_dev_buffer_sanity_check(a, b) do {} while (0)
962
963 #endif
964
965 /*
966  * Copy a host buffer chain to a set of mbufs.  This function assumes that
967  * there exactly the required number of mbufs to copy all source bytes.
968  */
969 static inline struct rte_mbuf *
970 avp_dev_copy_from_buffers(struct avp_dev *avp,
971                           struct rte_avp_desc *buf,
972                           struct rte_mbuf **mbufs,
973                           unsigned int count)
974 {
975         struct rte_mbuf *m_previous = NULL;
976         struct rte_avp_desc *pkt_buf;
977         unsigned int total_length = 0;
978         unsigned int copy_length;
979         unsigned int src_offset;
980         struct rte_mbuf *m;
981         uint16_t ol_flags;
982         uint16_t vlan_tci;
983         void *pkt_data;
984         unsigned int i;
985
986         avp_dev_buffer_sanity_check(avp, buf);
987
988         /* setup the first source buffer */
989         pkt_buf = avp_dev_translate_buffer(avp, buf);
990         pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
991         total_length = pkt_buf->pkt_len;
992         src_offset = 0;
993
994         if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
995                 ol_flags = PKT_RX_VLAN_PKT;
996                 vlan_tci = pkt_buf->vlan_tci;
997         } else {
998                 ol_flags = 0;
999                 vlan_tci = 0;
1000         }
1001
1002         for (i = 0; (i < count) && (buf != NULL); i++) {
1003                 /* fill each destination buffer */
1004                 m = mbufs[i];
1005
1006                 if (m_previous != NULL)
1007                         m_previous->next = m;
1008
1009                 m_previous = m;
1010
1011                 do {
1012                         /*
1013                          * Copy as many source buffers as will fit in the
1014                          * destination buffer.
1015                          */
1016                         copy_length = RTE_MIN((avp->guest_mbuf_size -
1017                                                rte_pktmbuf_data_len(m)),
1018                                               (pkt_buf->data_len -
1019                                                src_offset));
1020                         rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1021                                                rte_pktmbuf_data_len(m)),
1022                                    RTE_PTR_ADD(pkt_data, src_offset),
1023                                    copy_length);
1024                         rte_pktmbuf_data_len(m) += copy_length;
1025                         src_offset += copy_length;
1026
1027                         if (likely(src_offset == pkt_buf->data_len)) {
1028                                 /* need a new source buffer */
1029                                 buf = pkt_buf->next;
1030                                 if (buf != NULL) {
1031                                         pkt_buf = avp_dev_translate_buffer(
1032                                                 avp, buf);
1033                                         pkt_data = avp_dev_translate_buffer(
1034                                                 avp, pkt_buf->data);
1035                                         src_offset = 0;
1036                                 }
1037                         }
1038
1039                         if (unlikely(rte_pktmbuf_data_len(m) ==
1040                                      avp->guest_mbuf_size)) {
1041                                 /* need a new destination mbuf */
1042                                 break;
1043                         }
1044
1045                 } while (buf != NULL);
1046         }
1047
1048         m = mbufs[0];
1049         m->ol_flags = ol_flags;
1050         m->nb_segs = count;
1051         rte_pktmbuf_pkt_len(m) = total_length;
1052         m->vlan_tci = vlan_tci;
1053
1054         __rte_mbuf_sanity_check(m, 1);
1055
1056         return m;
1057 }
1058
1059 static uint16_t
1060 avp_recv_scattered_pkts(void *rx_queue,
1061                         struct rte_mbuf **rx_pkts,
1062                         uint16_t nb_pkts)
1063 {
1064         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1065         struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1066         struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS];
1067         struct avp_dev *avp = rxq->avp;
1068         struct rte_avp_desc *pkt_buf;
1069         struct rte_avp_fifo *free_q;
1070         struct rte_avp_fifo *rx_q;
1071         struct rte_avp_desc *buf;
1072         unsigned int count, avail, n;
1073         unsigned int guest_mbuf_size;
1074         struct rte_mbuf *m;
1075         unsigned int required;
1076         unsigned int buf_len;
1077         unsigned int port_id;
1078         unsigned int i;
1079
1080         guest_mbuf_size = avp->guest_mbuf_size;
1081         port_id = avp->port_id;
1082         rx_q = avp->rx_q[rxq->queue_id];
1083         free_q = avp->free_q[rxq->queue_id];
1084
1085         /* setup next queue to service */
1086         rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1087                 (rxq->queue_id + 1) : rxq->queue_base;
1088
1089         /* determine how many slots are available in the free queue */
1090         count = avp_fifo_free_count(free_q);
1091
1092         /* determine how many packets are available in the rx queue */
1093         avail = avp_fifo_count(rx_q);
1094
1095         /* determine how many packets can be received */
1096         count = RTE_MIN(count, avail);
1097         count = RTE_MIN(count, nb_pkts);
1098         count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1099
1100         if (unlikely(count == 0)) {
1101                 /* no free buffers, or no buffers on the rx queue */
1102                 return 0;
1103         }
1104
1105         /* retrieve pending packets */
1106         n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1107         PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1108                    count, rx_q);
1109
1110         count = 0;
1111         for (i = 0; i < n; i++) {
1112                 /* prefetch next entry while processing current one */
1113                 if (i + 1 < n) {
1114                         pkt_buf = avp_dev_translate_buffer(avp,
1115                                                            avp_bufs[i + 1]);
1116                         rte_prefetch0(pkt_buf);
1117                 }
1118                 buf = avp_bufs[i];
1119
1120                 /* Peek into the first buffer to determine the total length */
1121                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1122                 buf_len = pkt_buf->pkt_len;
1123
1124                 /* Allocate enough mbufs to receive the entire packet */
1125                 required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size;
1126                 if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) {
1127                         rxq->dev_data->rx_mbuf_alloc_failed++;
1128                         continue;
1129                 }
1130
1131                 /* Copy the data from the buffers to our mbufs */
1132                 m = avp_dev_copy_from_buffers(avp, buf, mbufs, required);
1133
1134                 /* finalize mbuf */
1135                 m->port = port_id;
1136
1137                 if (_avp_mac_filter(avp, m) != 0) {
1138                         /* silently discard packets not destined to our MAC */
1139                         rte_pktmbuf_free(m);
1140                         continue;
1141                 }
1142
1143                 /* return new mbuf to caller */
1144                 rx_pkts[count++] = m;
1145                 rxq->bytes += buf_len;
1146         }
1147
1148         rxq->packets += count;
1149
1150         /* return the buffers to the free queue */
1151         avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1152
1153         return count;
1154 }
1155
1156
1157 static uint16_t
1158 avp_recv_pkts(void *rx_queue,
1159               struct rte_mbuf **rx_pkts,
1160               uint16_t nb_pkts)
1161 {
1162         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1163         struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1164         struct avp_dev *avp = rxq->avp;
1165         struct rte_avp_desc *pkt_buf;
1166         struct rte_avp_fifo *free_q;
1167         struct rte_avp_fifo *rx_q;
1168         unsigned int count, avail, n;
1169         unsigned int pkt_len;
1170         struct rte_mbuf *m;
1171         char *pkt_data;
1172         unsigned int i;
1173
1174         rx_q = avp->rx_q[rxq->queue_id];
1175         free_q = avp->free_q[rxq->queue_id];
1176
1177         /* setup next queue to service */
1178         rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1179                 (rxq->queue_id + 1) : rxq->queue_base;
1180
1181         /* determine how many slots are available in the free queue */
1182         count = avp_fifo_free_count(free_q);
1183
1184         /* determine how many packets are available in the rx queue */
1185         avail = avp_fifo_count(rx_q);
1186
1187         /* determine how many packets can be received */
1188         count = RTE_MIN(count, avail);
1189         count = RTE_MIN(count, nb_pkts);
1190         count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1191
1192         if (unlikely(count == 0)) {
1193                 /* no free buffers, or no buffers on the rx queue */
1194                 return 0;
1195         }
1196
1197         /* retrieve pending packets */
1198         n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1199         PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1200                    count, rx_q);
1201
1202         count = 0;
1203         for (i = 0; i < n; i++) {
1204                 /* prefetch next entry while processing current one */
1205                 if (i < n - 1) {
1206                         pkt_buf = avp_dev_translate_buffer(avp,
1207                                                            avp_bufs[i + 1]);
1208                         rte_prefetch0(pkt_buf);
1209                 }
1210
1211                 /* Adjust host pointers for guest addressing */
1212                 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1213                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1214                 pkt_len = pkt_buf->pkt_len;
1215
1216                 if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1217                              (pkt_buf->nb_segs > 1))) {
1218                         /*
1219                          * application should be using the scattered receive
1220                          * function
1221                          */
1222                         rxq->errors++;
1223                         continue;
1224                 }
1225
1226                 /* process each packet to be transmitted */
1227                 m = rte_pktmbuf_alloc(avp->pool);
1228                 if (unlikely(m == NULL)) {
1229                         rxq->dev_data->rx_mbuf_alloc_failed++;
1230                         continue;
1231                 }
1232
1233                 /* copy data out of the host buffer to our buffer */
1234                 m->data_off = RTE_PKTMBUF_HEADROOM;
1235                 rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len);
1236
1237                 /* initialize the local mbuf */
1238                 rte_pktmbuf_data_len(m) = pkt_len;
1239                 rte_pktmbuf_pkt_len(m) = pkt_len;
1240                 m->port = avp->port_id;
1241
1242                 if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1243                         m->ol_flags = PKT_RX_VLAN_PKT;
1244                         m->vlan_tci = pkt_buf->vlan_tci;
1245                 }
1246
1247                 if (_avp_mac_filter(avp, m) != 0) {
1248                         /* silently discard packets not destined to our MAC */
1249                         rte_pktmbuf_free(m);
1250                         continue;
1251                 }
1252
1253                 /* return new mbuf to caller */
1254                 rx_pkts[count++] = m;
1255                 rxq->bytes += pkt_len;
1256         }
1257
1258         rxq->packets += count;
1259
1260         /* return the buffers to the free queue */
1261         avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1262
1263         return count;
1264 }
1265
1266 /*
1267  * Copy a chained mbuf to a set of host buffers.  This function assumes that
1268  * there are sufficient destination buffers to contain the entire source
1269  * packet.
1270  */
1271 static inline uint16_t
1272 avp_dev_copy_to_buffers(struct avp_dev *avp,
1273                         struct rte_mbuf *mbuf,
1274                         struct rte_avp_desc **buffers,
1275                         unsigned int count)
1276 {
1277         struct rte_avp_desc *previous_buf = NULL;
1278         struct rte_avp_desc *first_buf = NULL;
1279         struct rte_avp_desc *pkt_buf;
1280         struct rte_avp_desc *buf;
1281         size_t total_length;
1282         struct rte_mbuf *m;
1283         size_t copy_length;
1284         size_t src_offset;
1285         char *pkt_data;
1286         unsigned int i;
1287
1288         __rte_mbuf_sanity_check(mbuf, 1);
1289
1290         m = mbuf;
1291         src_offset = 0;
1292         total_length = rte_pktmbuf_pkt_len(m);
1293         for (i = 0; (i < count) && (m != NULL); i++) {
1294                 /* fill each destination buffer */
1295                 buf = buffers[i];
1296
1297                 if (i < count - 1) {
1298                         /* prefetch next entry while processing this one */
1299                         pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]);
1300                         rte_prefetch0(pkt_buf);
1301                 }
1302
1303                 /* Adjust pointers for guest addressing */
1304                 pkt_buf = avp_dev_translate_buffer(avp, buf);
1305                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1306
1307                 /* setup the buffer chain */
1308                 if (previous_buf != NULL)
1309                         previous_buf->next = buf;
1310                 else
1311                         first_buf = pkt_buf;
1312
1313                 previous_buf = pkt_buf;
1314
1315                 do {
1316                         /*
1317                          * copy as many source mbuf segments as will fit in the
1318                          * destination buffer.
1319                          */
1320                         copy_length = RTE_MIN((avp->host_mbuf_size -
1321                                                pkt_buf->data_len),
1322                                               (rte_pktmbuf_data_len(m) -
1323                                                src_offset));
1324                         rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len),
1325                                    RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1326                                                src_offset),
1327                                    copy_length);
1328                         pkt_buf->data_len += copy_length;
1329                         src_offset += copy_length;
1330
1331                         if (likely(src_offset == rte_pktmbuf_data_len(m))) {
1332                                 /* need a new source buffer */
1333                                 m = m->next;
1334                                 src_offset = 0;
1335                         }
1336
1337                         if (unlikely(pkt_buf->data_len ==
1338                                      avp->host_mbuf_size)) {
1339                                 /* need a new destination buffer */
1340                                 break;
1341                         }
1342
1343                 } while (m != NULL);
1344         }
1345
1346         first_buf->nb_segs = count;
1347         first_buf->pkt_len = total_length;
1348
1349         if (mbuf->ol_flags & PKT_TX_VLAN_PKT) {
1350                 first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1351                 first_buf->vlan_tci = mbuf->vlan_tci;
1352         }
1353
1354         avp_dev_buffer_sanity_check(avp, buffers[0]);
1355
1356         return total_length;
1357 }
1358
1359
1360 static uint16_t
1361 avp_xmit_scattered_pkts(void *tx_queue,
1362                         struct rte_mbuf **tx_pkts,
1363                         uint16_t nb_pkts)
1364 {
1365         struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST *
1366                                        RTE_AVP_MAX_MBUF_SEGMENTS)];
1367         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1368         struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST];
1369         struct avp_dev *avp = txq->avp;
1370         struct rte_avp_fifo *alloc_q;
1371         struct rte_avp_fifo *tx_q;
1372         unsigned int count, avail, n;
1373         unsigned int orig_nb_pkts;
1374         struct rte_mbuf *m;
1375         unsigned int required;
1376         unsigned int segments;
1377         unsigned int tx_bytes;
1378         unsigned int i;
1379
1380         orig_nb_pkts = nb_pkts;
1381         tx_q = avp->tx_q[txq->queue_id];
1382         alloc_q = avp->alloc_q[txq->queue_id];
1383
1384         /* limit the number of transmitted packets to the max burst size */
1385         if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1386                 nb_pkts = AVP_MAX_TX_BURST;
1387
1388         /* determine how many buffers are available to copy into */
1389         avail = avp_fifo_count(alloc_q);
1390         if (unlikely(avail > (AVP_MAX_TX_BURST *
1391                               RTE_AVP_MAX_MBUF_SEGMENTS)))
1392                 avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS;
1393
1394         /* determine how many slots are available in the transmit queue */
1395         count = avp_fifo_free_count(tx_q);
1396
1397         /* determine how many packets can be sent */
1398         nb_pkts = RTE_MIN(count, nb_pkts);
1399
1400         /* determine how many packets will fit in the available buffers */
1401         count = 0;
1402         segments = 0;
1403         for (i = 0; i < nb_pkts; i++) {
1404                 m = tx_pkts[i];
1405                 if (likely(i < (unsigned int)nb_pkts - 1)) {
1406                         /* prefetch next entry while processing this one */
1407                         rte_prefetch0(tx_pkts[i + 1]);
1408                 }
1409                 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1410                         avp->host_mbuf_size;
1411
1412                 if (unlikely((required == 0) ||
1413                              (required > RTE_AVP_MAX_MBUF_SEGMENTS)))
1414                         break;
1415                 else if (unlikely(required + segments > avail))
1416                         break;
1417                 segments += required;
1418                 count++;
1419         }
1420         nb_pkts = count;
1421
1422         if (unlikely(nb_pkts == 0)) {
1423                 /* no available buffers, or no space on the tx queue */
1424                 txq->errors += orig_nb_pkts;
1425                 return 0;
1426         }
1427
1428         PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1429                    nb_pkts, tx_q);
1430
1431         /* retrieve sufficient send buffers */
1432         n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments);
1433         if (unlikely(n != segments)) {
1434                 PMD_TX_LOG(DEBUG, "Failed to allocate buffers "
1435                            "n=%u, segments=%u, orig=%u\n",
1436                            n, segments, orig_nb_pkts);
1437                 txq->errors += orig_nb_pkts;
1438                 return 0;
1439         }
1440
1441         tx_bytes = 0;
1442         count = 0;
1443         for (i = 0; i < nb_pkts; i++) {
1444                 /* process each packet to be transmitted */
1445                 m = tx_pkts[i];
1446
1447                 /* determine how many buffers are required for this packet */
1448                 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1449                         avp->host_mbuf_size;
1450
1451                 tx_bytes += avp_dev_copy_to_buffers(avp, m,
1452                                                     &avp_bufs[count], required);
1453                 tx_bufs[i] = avp_bufs[count];
1454                 count += required;
1455
1456                 /* free the original mbuf */
1457                 rte_pktmbuf_free(m);
1458         }
1459
1460         txq->packets += nb_pkts;
1461         txq->bytes += tx_bytes;
1462
1463 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1464         for (i = 0; i < nb_pkts; i++)
1465                 avp_dev_buffer_sanity_check(avp, tx_bufs[i]);
1466 #endif
1467
1468         /* send the packets */
1469         n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts);
1470         if (unlikely(n != orig_nb_pkts))
1471                 txq->errors += (orig_nb_pkts - n);
1472
1473         return n;
1474 }
1475
1476
1477 static uint16_t
1478 avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1479 {
1480         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1481         struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST];
1482         struct avp_dev *avp = txq->avp;
1483         struct rte_avp_desc *pkt_buf;
1484         struct rte_avp_fifo *alloc_q;
1485         struct rte_avp_fifo *tx_q;
1486         unsigned int count, avail, n;
1487         struct rte_mbuf *m;
1488         unsigned int pkt_len;
1489         unsigned int tx_bytes;
1490         char *pkt_data;
1491         unsigned int i;
1492
1493         tx_q = avp->tx_q[txq->queue_id];
1494         alloc_q = avp->alloc_q[txq->queue_id];
1495
1496         /* limit the number of transmitted packets to the max burst size */
1497         if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1498                 nb_pkts = AVP_MAX_TX_BURST;
1499
1500         /* determine how many buffers are available to copy into */
1501         avail = avp_fifo_count(alloc_q);
1502
1503         /* determine how many slots are available in the transmit queue */
1504         count = avp_fifo_free_count(tx_q);
1505
1506         /* determine how many packets can be sent */
1507         count = RTE_MIN(count, avail);
1508         count = RTE_MIN(count, nb_pkts);
1509
1510         if (unlikely(count == 0)) {
1511                 /* no available buffers, or no space on the tx queue */
1512                 txq->errors += nb_pkts;
1513                 return 0;
1514         }
1515
1516         PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1517                    count, tx_q);
1518
1519         /* retrieve sufficient send buffers */
1520         n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count);
1521         if (unlikely(n != count)) {
1522                 txq->errors++;
1523                 return 0;
1524         }
1525
1526         tx_bytes = 0;
1527         for (i = 0; i < count; i++) {
1528                 /* prefetch next entry while processing the current one */
1529                 if (i < count - 1) {
1530                         pkt_buf = avp_dev_translate_buffer(avp,
1531                                                            avp_bufs[i + 1]);
1532                         rte_prefetch0(pkt_buf);
1533                 }
1534
1535                 /* process each packet to be transmitted */
1536                 m = tx_pkts[i];
1537
1538                 /* Adjust pointers for guest addressing */
1539                 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1540                 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1541                 pkt_len = rte_pktmbuf_pkt_len(m);
1542
1543                 if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1544                                          (pkt_len > avp->host_mbuf_size))) {
1545                         /*
1546                          * application should be using the scattered transmit
1547                          * function; send it truncated to avoid the performance
1548                          * hit of having to manage returning the already
1549                          * allocated buffer to the free list.  This should not
1550                          * happen since the application should have set the
1551                          * max_rx_pkt_len based on its MTU and it should be
1552                          * policing its own packet sizes.
1553                          */
1554                         txq->errors++;
1555                         pkt_len = RTE_MIN(avp->guest_mbuf_size,
1556                                           avp->host_mbuf_size);
1557                 }
1558
1559                 /* copy data out of our mbuf and into the AVP buffer */
1560                 rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len);
1561                 pkt_buf->pkt_len = pkt_len;
1562                 pkt_buf->data_len = pkt_len;
1563                 pkt_buf->nb_segs = 1;
1564                 pkt_buf->next = NULL;
1565
1566                 if (m->ol_flags & PKT_TX_VLAN_PKT) {
1567                         pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1568                         pkt_buf->vlan_tci = m->vlan_tci;
1569                 }
1570
1571                 tx_bytes += pkt_len;
1572
1573                 /* free the original mbuf */
1574                 rte_pktmbuf_free(m);
1575         }
1576
1577         txq->packets += count;
1578         txq->bytes += tx_bytes;
1579
1580         /* send the packets */
1581         n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count);
1582
1583         return n;
1584 }
1585
1586 static void
1587 avp_dev_rx_queue_release(void *rx_queue)
1588 {
1589         struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1590         struct avp_dev *avp = rxq->avp;
1591         struct rte_eth_dev_data *data = avp->dev_data;
1592         unsigned int i;
1593
1594         for (i = 0; i < avp->num_rx_queues; i++) {
1595                 if (data->rx_queues[i] == rxq)
1596                         data->rx_queues[i] = NULL;
1597         }
1598 }
1599
1600 static void
1601 avp_dev_tx_queue_release(void *tx_queue)
1602 {
1603         struct avp_queue *txq = (struct avp_queue *)tx_queue;
1604         struct avp_dev *avp = txq->avp;
1605         struct rte_eth_dev_data *data = avp->dev_data;
1606         unsigned int i;
1607
1608         for (i = 0; i < avp->num_tx_queues; i++) {
1609                 if (data->tx_queues[i] == txq)
1610                         data->tx_queues[i] = NULL;
1611         }
1612 }
1613
1614 static int
1615 avp_dev_configure(struct rte_eth_dev *eth_dev)
1616 {
1617         struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
1618         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1619         struct rte_avp_device_info *host_info;
1620         struct rte_avp_device_config config;
1621         int mask = 0;
1622         void *addr;
1623         int ret;
1624
1625         addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
1626         host_info = (struct rte_avp_device_info *)addr;
1627
1628         /* Setup required number of queues */
1629         _avp_set_queue_counts(eth_dev);
1630
1631         mask = (ETH_VLAN_STRIP_MASK |
1632                 ETH_VLAN_FILTER_MASK |
1633                 ETH_VLAN_EXTEND_MASK);
1634         avp_vlan_offload_set(eth_dev, mask);
1635
1636         /* update device config */
1637         memset(&config, 0, sizeof(config));
1638         config.device_id = host_info->device_id;
1639         config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
1640         config.driver_version = AVP_DPDK_DRIVER_VERSION;
1641         config.features = avp->features;
1642         config.num_tx_queues = avp->num_tx_queues;
1643         config.num_rx_queues = avp->num_rx_queues;
1644
1645         ret = avp_dev_ctrl_set_config(eth_dev, &config);
1646         if (ret < 0) {
1647                 PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
1648                             ret);
1649                 goto unlock;
1650         }
1651
1652         avp->flags |= AVP_F_CONFIGURED;
1653         ret = 0;
1654
1655 unlock:
1656         return ret;
1657 }
1658
1659
1660 static int
1661 avp_dev_link_update(struct rte_eth_dev *eth_dev,
1662                                         __rte_unused int wait_to_complete)
1663 {
1664         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1665         struct rte_eth_link *link = &eth_dev->data->dev_link;
1666
1667         link->link_speed = ETH_SPEED_NUM_10G;
1668         link->link_duplex = ETH_LINK_FULL_DUPLEX;
1669         link->link_status = !!(avp->flags & AVP_F_LINKUP);
1670
1671         return -1;
1672 }
1673
1674
1675 static void
1676 avp_dev_info_get(struct rte_eth_dev *eth_dev,
1677                  struct rte_eth_dev_info *dev_info)
1678 {
1679         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1680
1681         dev_info->driver_name = "rte_avp_pmd";
1682         dev_info->pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
1683         dev_info->max_rx_queues = avp->max_rx_queues;
1684         dev_info->max_tx_queues = avp->max_tx_queues;
1685         dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE;
1686         dev_info->max_rx_pktlen = avp->max_rx_pkt_len;
1687         dev_info->max_mac_addrs = AVP_MAX_MAC_ADDRS;
1688         if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
1689                 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
1690                 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
1691         }
1692 }
1693
1694 static void
1695 avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
1696 {
1697         struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1698
1699         if (mask & ETH_VLAN_STRIP_MASK) {
1700                 if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
1701                         if (eth_dev->data->dev_conf.rxmode.hw_vlan_strip)
1702                                 avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD;
1703                         else
1704                                 avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD;
1705                 } else {
1706                         PMD_DRV_LOG(ERR, "VLAN strip offload not supported\n");
1707                 }
1708         }
1709
1710         if (mask & ETH_VLAN_FILTER_MASK) {
1711                 if (eth_dev->data->dev_conf.rxmode.hw_vlan_filter)
1712                         PMD_DRV_LOG(ERR, "VLAN filter offload not supported\n");
1713         }
1714
1715         if (mask & ETH_VLAN_EXTEND_MASK) {
1716                 if (eth_dev->data->dev_conf.rxmode.hw_vlan_extend)
1717                         PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n");
1718         }
1719 }
1720
1721 RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd.pci_drv);
1722 RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map);