4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <linux/if_ether.h>
36 #include <linux/if_vlan.h>
37 #include <linux/virtio_net.h>
38 #include <linux/virtio_ring.h>
39 #include <sys/param.h>
42 #include <rte_ethdev.h>
44 #include <rte_string_fns.h>
46 #include <rte_malloc.h>
52 #include "rte_virtio_net.h"
54 #include "vxlan_setup.h"
56 #define IPV4_HEADER_LEN 20
57 #define UDP_HEADER_LEN 8
58 #define VXLAN_HEADER_LEN 8
60 #define IP_VERSION 0x40
61 #define IP_HDRLEN 0x05 /* default IP header length == five 32-bits words. */
62 #define IP_DEFTTL 64 /* from RFC 1340. */
63 #define IP_VHL_DEF (IP_VERSION | IP_HDRLEN)
65 #define IP_DN_FRAGMENT_FLAG 0x0040
67 /* Used to compare MAC addresses. */
68 #define MAC_ADDR_CMP 0xFFFFFFFFFFFFULL
70 /* Configurable number of RX/TX ring descriptors */
71 #define RTE_TEST_RX_DESC_DEFAULT 1024
72 #define RTE_TEST_TX_DESC_DEFAULT 512
75 struct vxlan_conf vxdev;
77 struct ipv4_hdr app_ip_hdr[VXLAN_N_PORTS];
78 struct ether_hdr app_l2_hdr[VXLAN_N_PORTS];
80 /* local VTEP IP address */
81 uint8_t vxlan_multicast_ips[2][4] = { {239, 1, 1, 1 }, {239, 1, 2, 1 } };
83 /* Remote VTEP IP address */
84 uint8_t vxlan_overlay_ips[2][4] = { {192, 168, 10, 1}, {192, 168, 30, 1} };
86 /* Remote VTEP MAC address */
87 uint8_t peer_mac[6] = {0x00, 0x11, 0x01, 0x00, 0x00, 0x01};
89 /* Options for configuring ethernet port */
90 static const struct rte_eth_conf port_conf = {
93 .header_split = 0, /**< Header Split disabled */
94 .hw_ip_checksum = 0, /**< IP checksum offload disabled */
95 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
96 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
97 .hw_strip_crc = 0, /**< CRC stripped by hardware */
100 .mq_mode = ETH_MQ_TX_NONE,
105 * The one or two device(s) that belongs to the same tenant ID can
106 * be assigned in a VM.
108 const uint16_t tenant_id_conf[] = {
109 1000, 1000, 1001, 1001, 1002, 1002, 1003, 1003,
110 1004, 1004, 1005, 1005, 1006, 1006, 1007, 1007,
111 1008, 1008, 1009, 1009, 1010, 1010, 1011, 1011,
112 1012, 1012, 1013, 1013, 1014, 1014, 1015, 1015,
113 1016, 1016, 1017, 1017, 1018, 1018, 1019, 1019,
114 1020, 1020, 1021, 1021, 1022, 1022, 1023, 1023,
115 1024, 1024, 1025, 1025, 1026, 1026, 1027, 1027,
116 1028, 1028, 1029, 1029, 1030, 1030, 1031, 1031,
120 * Initialises a given port using global settings and with the rx buffers
121 * coming from the mbuf_pool passed as parameter
124 vxlan_port_init(uint8_t port, struct rte_mempool *mbuf_pool)
128 struct rte_eth_dev_info dev_info;
129 uint16_t rx_rings, tx_rings = (uint16_t)rte_lcore_count();
130 const uint16_t rx_ring_size = RTE_TEST_RX_DESC_DEFAULT;
131 const uint16_t tx_ring_size = RTE_TEST_TX_DESC_DEFAULT;
132 struct rte_eth_udp_tunnel tunnel_udp;
133 struct rte_eth_rxconf *rxconf;
134 struct rte_eth_txconf *txconf;
135 struct vxlan_conf *pconf = &vxdev;
137 pconf->dst_port = udp_port;
139 rte_eth_dev_info_get(port, &dev_info);
141 if (dev_info.max_rx_queues > MAX_QUEUES) {
142 rte_exit(EXIT_FAILURE,
143 "please define MAX_QUEUES no less than %u in %s\n",
144 dev_info.max_rx_queues, __FILE__);
147 rxconf = &dev_info.default_rxconf;
148 txconf = &dev_info.default_txconf;
149 txconf->txq_flags = 0;
151 if (port >= rte_eth_dev_count())
154 rx_rings = nb_devices;
156 /* Configure ethernet device. */
157 retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
161 /* Setup the queues. */
162 for (q = 0; q < rx_rings; q++) {
163 retval = rte_eth_rx_queue_setup(port, q, rx_ring_size,
164 rte_eth_dev_socket_id(port),
170 for (q = 0; q < tx_rings; q++) {
171 retval = rte_eth_tx_queue_setup(port, q, tx_ring_size,
172 rte_eth_dev_socket_id(port),
178 /* Start the device. */
179 retval = rte_eth_dev_start(port);
183 /* Configure UDP port for UDP tunneling */
184 tunnel_udp.udp_port = udp_port;
185 tunnel_udp.prot_type = RTE_TUNNEL_TYPE_VXLAN;
186 retval = rte_eth_dev_udp_tunnel_add(port, &tunnel_udp);
189 rte_eth_macaddr_get(port, &ports_eth_addr[port]);
190 RTE_LOG(INFO, PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
191 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
193 ports_eth_addr[port].addr_bytes[0],
194 ports_eth_addr[port].addr_bytes[1],
195 ports_eth_addr[port].addr_bytes[2],
196 ports_eth_addr[port].addr_bytes[3],
197 ports_eth_addr[port].addr_bytes[4],
198 ports_eth_addr[port].addr_bytes[5]);
204 vxlan_rx_process(struct rte_mbuf *pkt)
206 return decapsulation(pkt);
210 vxlan_tx_process(uint8_t queue_id, struct rte_mbuf *pkt)
212 encapsulation(pkt, queue_id);
217 * This function learns the MAC address of the device and set init
218 * L2 header and L3 header info.
221 vxlan_link(struct vhost_dev *vdev, struct rte_mbuf *m)
224 struct ether_hdr *pkt_hdr;
225 struct virtio_net *dev = vdev->dev;
226 uint64_t portid = dev->device_fh;
229 if (unlikely(portid > VXLAN_N_PORTS)) {
230 RTE_LOG(INFO, VHOST_DATA,
231 "(%"PRIu64") WARNING: Not configuring device,"
232 "as already have %d ports for VXLAN.",
233 dev->device_fh, VXLAN_N_PORTS);
237 /* Learn MAC address of guest device from packet */
238 pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
239 if (is_same_ether_addr(&(pkt_hdr->s_addr), &vdev->mac_address)) {
240 RTE_LOG(INFO, VHOST_DATA,
241 "(%"PRIu64") WARNING: This device is using an existing"
242 " MAC address and has not been registered.\n",
247 for (i = 0; i < ETHER_ADDR_LEN; i++) {
248 vdev->mac_address.addr_bytes[i] =
249 vxdev.port[portid].vport_mac.addr_bytes[i] =
250 pkt_hdr->s_addr.addr_bytes[i];
251 vxdev.port[portid].peer_mac.addr_bytes[i] = peer_mac[i];
254 /* Print out inner MAC and VNI info. */
255 RTE_LOG(INFO, VHOST_DATA,
256 "(%d) MAC_ADDRESS %02x:%02x:%02x:%02x:%02x:%02x and VNI %d registered\n",
258 vdev->mac_address.addr_bytes[0],
259 vdev->mac_address.addr_bytes[1],
260 vdev->mac_address.addr_bytes[2],
261 vdev->mac_address.addr_bytes[3],
262 vdev->mac_address.addr_bytes[4],
263 vdev->mac_address.addr_bytes[5],
264 tenant_id_conf[vdev->rx_q]);
266 vxdev.port[portid].vport_id = portid;
268 for (i = 0; i < 4; i++) {
270 vxdev.port_ip |= vxlan_multicast_ips[portid][i] << (8 * i);
272 vxdev.port[portid].peer_ip |=
273 vxlan_overlay_ips[portid][i] << (8 * i);
276 vxdev.out_key = tenant_id_conf[vdev->rx_q];
277 ether_addr_copy(&vxdev.port[portid].peer_mac,
278 &app_l2_hdr[portid].d_addr);
279 ether_addr_copy(&ports_eth_addr[0],
280 &app_l2_hdr[portid].s_addr);
281 app_l2_hdr[portid].ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
283 ip = &app_ip_hdr[portid];
284 ip->version_ihl = IP_VHL_DEF;
285 ip->type_of_service = 0;
286 ip->total_length = 0;
288 ip->fragment_offset = IP_DN_FRAGMENT_FLAG;
289 ip->time_to_live = IP_DEFTTL;
290 ip->next_proto_id = IPPROTO_UDP;
291 ip->hdr_checksum = 0;
292 ip->src_addr = vxdev.port_ip;
293 ip->dst_addr = vxdev.port[portid].peer_ip;
295 /* Set device as ready for RX. */
296 vdev->ready = DEVICE_RX;
302 * Removes cloud filter. Ensures that nothing is adding buffers to the RX
303 * queue before disabling RX on the device.
306 vxlan_unlink(struct vhost_dev *vdev)
308 unsigned i = 0, rx_count;
309 struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
311 if (vdev->ready == DEVICE_RX) {
312 for (i = 0; i < ETHER_ADDR_LEN; i++)
313 vdev->mac_address.addr_bytes[i] = 0;
315 /* Clear out the receive buffers */
316 rx_count = rte_eth_rx_burst(ports[0],
317 (uint16_t)vdev->rx_q,
318 pkts_burst, MAX_PKT_BURST);
321 for (i = 0; i < rx_count; i++)
322 rte_pktmbuf_free(pkts_burst[i]);
324 rx_count = rte_eth_rx_burst(ports[0],
325 (uint16_t)vdev->rx_q,
326 pkts_burst, MAX_PKT_BURST);
328 vdev->ready = DEVICE_MAC_LEARNING;
332 /* Transmit packets after encapsulating */
334 vxlan_tx_pkts(uint8_t port_id, uint16_t queue_id,
335 struct rte_mbuf **tx_pkts, uint16_t nb_pkts) {
339 for (i = 0; i < nb_pkts; i++)
340 vxlan_tx_process(queue_id, tx_pkts[i]);
342 ret = rte_eth_tx_burst(port_id, queue_id, tx_pkts, nb_pkts);
347 /* Check for decapsulation and pass packets directly to VIRTIO device */
349 vxlan_rx_pkts(struct virtio_net *dev, struct rte_mbuf **pkts_burst,
355 struct rte_mbuf *pkts_valid[rx_count];
357 for (i = 0; i < rx_count; i++) {
358 ret = vxlan_rx_process(pkts_burst[i]);
359 if (unlikely(ret < 0))
362 pkts_valid[count] = pkts_burst[i];
366 ret = rte_vhost_enqueue_burst(dev, VIRTIO_RXQ, pkts_valid, count);