examples/tep_term: add tunnel filter type configuration
[dpdk.git] / examples / tep_termination / vxlan_setup.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <getopt.h>
35 #include <linux/if_ether.h>
36 #include <linux/if_vlan.h>
37 #include <linux/virtio_net.h>
38 #include <linux/virtio_ring.h>
39 #include <sys/param.h>
40 #include <unistd.h>
41
42 #include <rte_ethdev.h>
43 #include <rte_log.h>
44 #include <rte_string_fns.h>
45 #include <rte_mbuf.h>
46 #include <rte_malloc.h>
47 #include <rte_ip.h>
48 #include <rte_udp.h>
49 #include <rte_tcp.h>
50
51 #include "main.h"
52 #include "rte_virtio_net.h"
53 #include "vxlan.h"
54 #include "vxlan_setup.h"
55
56 #define IPV4_HEADER_LEN 20
57 #define UDP_HEADER_LEN  8
58 #define VXLAN_HEADER_LEN 8
59
60 #define IP_VERSION 0x40
61 #define IP_HDRLEN  0x05 /* default IP header length == five 32-bits words. */
62 #define IP_DEFTTL  64   /* from RFC 1340. */
63 #define IP_VHL_DEF (IP_VERSION | IP_HDRLEN)
64
65 #define IP_DN_FRAGMENT_FLAG 0x0040
66
67 /* Used to compare MAC addresses. */
68 #define MAC_ADDR_CMP 0xFFFFFFFFFFFFULL
69
70 /* Configurable number of RX/TX ring descriptors */
71 #define RTE_TEST_RX_DESC_DEFAULT 1024
72 #define RTE_TEST_TX_DESC_DEFAULT 512
73
74 /* Default inner VLAN ID */
75 #define INNER_VLAN_ID 100
76
77 /* VXLAN device */
78 struct vxlan_conf vxdev;
79
80 struct ipv4_hdr app_ip_hdr[VXLAN_N_PORTS];
81 struct ether_hdr app_l2_hdr[VXLAN_N_PORTS];
82
83 /* local VTEP IP address */
84 uint8_t vxlan_multicast_ips[2][4] = { {239, 1, 1, 1 }, {239, 1, 2, 1 } };
85
86 /* Remote VTEP IP address */
87 uint8_t vxlan_overlay_ips[2][4] = { {192, 168, 10, 1}, {192, 168, 30, 1} };
88
89 /* Remote VTEP MAC address */
90 uint8_t peer_mac[6] = {0x00, 0x11, 0x01, 0x00, 0x00, 0x01};
91
92 /* VXLAN RX filter type */
93 uint8_t tep_filter_type[] = {RTE_TUNNEL_FILTER_IMAC_TENID,
94                         RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID,
95                         RTE_TUNNEL_FILTER_OMAC_TENID_IMAC,};
96
97 /* Options for configuring ethernet port */
98 static const struct rte_eth_conf port_conf = {
99         .rxmode = {
100                 .split_hdr_size = 0,
101                 .header_split   = 0, /**< Header Split disabled */
102                 .hw_ip_checksum = 0, /**< IP checksum offload disabled */
103                 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
104                 .jumbo_frame    = 0, /**< Jumbo Frame Support disabled */
105                 .hw_strip_crc   = 0, /**< CRC stripped by hardware */
106         },
107         .txmode = {
108                 .mq_mode = ETH_MQ_TX_NONE,
109         },
110 };
111
112 /**
113  * The one or two device(s) that belongs to the same tenant ID can
114  * be assigned in a VM.
115  */
116 const uint16_t tenant_id_conf[] = {
117         1000, 1000, 1001, 1001, 1002, 1002, 1003, 1003,
118         1004, 1004, 1005, 1005, 1006, 1006, 1007, 1007,
119         1008, 1008, 1009, 1009, 1010, 1010, 1011, 1011,
120         1012, 1012, 1013, 1013, 1014, 1014, 1015, 1015,
121         1016, 1016, 1017, 1017, 1018, 1018, 1019, 1019,
122         1020, 1020, 1021, 1021, 1022, 1022, 1023, 1023,
123         1024, 1024, 1025, 1025, 1026, 1026, 1027, 1027,
124         1028, 1028, 1029, 1029, 1030, 1030, 1031, 1031,
125 };
126
127 /**
128  * Initialises a given port using global settings and with the rx buffers
129  * coming from the mbuf_pool passed as parameter
130  */
131 int
132 vxlan_port_init(uint8_t port, struct rte_mempool *mbuf_pool)
133 {
134         int retval;
135         uint16_t q;
136         struct rte_eth_dev_info dev_info;
137         uint16_t rx_rings, tx_rings = (uint16_t)rte_lcore_count();
138         const uint16_t rx_ring_size = RTE_TEST_RX_DESC_DEFAULT;
139         const uint16_t tx_ring_size = RTE_TEST_TX_DESC_DEFAULT;
140         struct rte_eth_udp_tunnel tunnel_udp;
141         struct rte_eth_rxconf *rxconf;
142         struct rte_eth_txconf *txconf;
143         struct vxlan_conf *pconf = &vxdev;
144
145         pconf->dst_port = udp_port;
146
147         rte_eth_dev_info_get(port, &dev_info);
148
149         if (dev_info.max_rx_queues > MAX_QUEUES) {
150                 rte_exit(EXIT_FAILURE,
151                         "please define MAX_QUEUES no less than %u in %s\n",
152                         dev_info.max_rx_queues, __FILE__);
153         }
154
155         rxconf = &dev_info.default_rxconf;
156         txconf = &dev_info.default_txconf;
157         txconf->txq_flags = 0;
158
159         if (port >= rte_eth_dev_count())
160                 return -1;
161
162         rx_rings = nb_devices;
163
164         /* Configure ethernet device. */
165         retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
166         if (retval != 0)
167                 return retval;
168
169         /* Setup the queues. */
170         for (q = 0; q < rx_rings; q++) {
171                 retval = rte_eth_rx_queue_setup(port, q, rx_ring_size,
172                                                 rte_eth_dev_socket_id(port),
173                                                 rxconf,
174                                                 mbuf_pool);
175                 if (retval < 0)
176                         return retval;
177         }
178         for (q = 0; q < tx_rings; q++) {
179                 retval = rte_eth_tx_queue_setup(port, q, tx_ring_size,
180                                                 rte_eth_dev_socket_id(port),
181                                                 txconf);
182                 if (retval < 0)
183                         return retval;
184         }
185
186         /* Start the device. */
187         retval  = rte_eth_dev_start(port);
188         if (retval < 0)
189                 return retval;
190
191         /* Configure UDP port for UDP tunneling */
192         tunnel_udp.udp_port = udp_port;
193         tunnel_udp.prot_type = RTE_TUNNEL_TYPE_VXLAN;
194         retval = rte_eth_dev_udp_tunnel_add(port, &tunnel_udp);
195         if (retval < 0)
196                 return retval;
197         rte_eth_macaddr_get(port, &ports_eth_addr[port]);
198         RTE_LOG(INFO, PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
199                         " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
200                         (unsigned)port,
201                         ports_eth_addr[port].addr_bytes[0],
202                         ports_eth_addr[port].addr_bytes[1],
203                         ports_eth_addr[port].addr_bytes[2],
204                         ports_eth_addr[port].addr_bytes[3],
205                         ports_eth_addr[port].addr_bytes[4],
206                         ports_eth_addr[port].addr_bytes[5]);
207
208         return 0;
209 }
210
211 static int
212 vxlan_rx_process(struct rte_mbuf *pkt)
213 {
214         return decapsulation(pkt);
215 }
216
217 static void
218 vxlan_tx_process(uint8_t queue_id, struct rte_mbuf *pkt)
219 {
220         encapsulation(pkt, queue_id);
221         return;
222 }
223
224 /*
225  * This function learns the MAC address of the device and set init
226  * L2 header and L3 header info.
227  */
228 int
229 vxlan_link(struct vhost_dev *vdev, struct rte_mbuf *m)
230 {
231         int i, ret;
232         struct ether_hdr *pkt_hdr;
233         struct virtio_net *dev = vdev->dev;
234         uint64_t portid = dev->device_fh;
235         struct ipv4_hdr *ip;
236
237         struct rte_eth_tunnel_filter_conf tunnel_filter_conf;
238
239         if (unlikely(portid > VXLAN_N_PORTS)) {
240                 RTE_LOG(INFO, VHOST_DATA,
241                         "(%"PRIu64") WARNING: Not configuring device,"
242                         "as already have %d ports for VXLAN.",
243                         dev->device_fh, VXLAN_N_PORTS);
244                 return -1;
245         }
246
247         /* Learn MAC address of guest device from packet */
248         pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
249         if (is_same_ether_addr(&(pkt_hdr->s_addr), &vdev->mac_address)) {
250                 RTE_LOG(INFO, VHOST_DATA,
251                         "(%"PRIu64") WARNING: This device is using an existing"
252                         " MAC address and has not been registered.\n",
253                         dev->device_fh);
254                 return -1;
255         }
256
257         for (i = 0; i < ETHER_ADDR_LEN; i++) {
258                 vdev->mac_address.addr_bytes[i] =
259                         vxdev.port[portid].vport_mac.addr_bytes[i] =
260                         pkt_hdr->s_addr.addr_bytes[i];
261                 vxdev.port[portid].peer_mac.addr_bytes[i] = peer_mac[i];
262         }
263
264         memset(&tunnel_filter_conf, 0,
265                 sizeof(struct rte_eth_tunnel_filter_conf));
266
267         tunnel_filter_conf.outer_mac = &ports_eth_addr[0];
268         tunnel_filter_conf.filter_type = tep_filter_type[filter_idx];
269
270         /* inner MAC */
271         tunnel_filter_conf.inner_mac = &vdev->mac_address;
272
273         tunnel_filter_conf.queue_id = vdev->rx_q;
274         tunnel_filter_conf.tenant_id = tenant_id_conf[vdev->rx_q];
275
276         if (tep_filter_type[filter_idx] == RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID)
277                 tunnel_filter_conf.inner_vlan = INNER_VLAN_ID;
278
279         tunnel_filter_conf.tunnel_type = RTE_TUNNEL_TYPE_VXLAN;
280
281         ret = rte_eth_dev_filter_ctrl(ports[0],
282                 RTE_ETH_FILTER_TUNNEL,
283                 RTE_ETH_FILTER_ADD,
284                 &tunnel_filter_conf);
285         if (ret) {
286                 RTE_LOG(ERR, VHOST_DATA,
287                         "%d Failed to add device MAC address to cloud filter\n",
288                 vdev->rx_q);
289                 return -1;
290         }
291
292         /* Print out inner MAC and VNI info. */
293         RTE_LOG(INFO, VHOST_DATA,
294                 "(%d) MAC_ADDRESS %02x:%02x:%02x:%02x:%02x:%02x and VNI %d registered\n",
295                 vdev->rx_q,
296                 vdev->mac_address.addr_bytes[0],
297                 vdev->mac_address.addr_bytes[1],
298                 vdev->mac_address.addr_bytes[2],
299                 vdev->mac_address.addr_bytes[3],
300                 vdev->mac_address.addr_bytes[4],
301                 vdev->mac_address.addr_bytes[5],
302                 tenant_id_conf[vdev->rx_q]);
303
304         vxdev.port[portid].vport_id = portid;
305
306         for (i = 0; i < 4; i++) {
307                 /* Local VTEP IP */
308                 vxdev.port_ip |= vxlan_multicast_ips[portid][i] << (8 * i);
309                 /* Remote VTEP IP */
310                 vxdev.port[portid].peer_ip |=
311                         vxlan_overlay_ips[portid][i] << (8 * i);
312         }
313
314         vxdev.out_key = tenant_id_conf[vdev->rx_q];
315         ether_addr_copy(&vxdev.port[portid].peer_mac,
316                         &app_l2_hdr[portid].d_addr);
317         ether_addr_copy(&ports_eth_addr[0],
318                         &app_l2_hdr[portid].s_addr);
319         app_l2_hdr[portid].ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
320
321         ip = &app_ip_hdr[portid];
322         ip->version_ihl = IP_VHL_DEF;
323         ip->type_of_service = 0;
324         ip->total_length = 0;
325         ip->packet_id = 0;
326         ip->fragment_offset = IP_DN_FRAGMENT_FLAG;
327         ip->time_to_live = IP_DEFTTL;
328         ip->next_proto_id = IPPROTO_UDP;
329         ip->hdr_checksum = 0;
330         ip->src_addr = vxdev.port_ip;
331         ip->dst_addr = vxdev.port[portid].peer_ip;
332
333         /* Set device as ready for RX. */
334         vdev->ready = DEVICE_RX;
335
336         return 0;
337 }
338
339 /**
340  * Removes cloud filter. Ensures that nothing is adding buffers to the RX
341  * queue before disabling RX on the device.
342  */
343 void
344 vxlan_unlink(struct vhost_dev *vdev)
345 {
346         unsigned i = 0, rx_count;
347         int ret;
348         struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
349         struct rte_eth_tunnel_filter_conf tunnel_filter_conf;
350
351         if (vdev->ready == DEVICE_RX) {
352                 memset(&tunnel_filter_conf, 0,
353                         sizeof(struct rte_eth_tunnel_filter_conf));
354
355                 tunnel_filter_conf.outer_mac = &ports_eth_addr[0];
356                 tunnel_filter_conf.inner_mac = &vdev->mac_address;
357                 tunnel_filter_conf.tenant_id = tenant_id_conf[vdev->rx_q];
358                 tunnel_filter_conf.filter_type = tep_filter_type[filter_idx];
359
360                 if (tep_filter_type[filter_idx] ==
361                         RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID)
362                         tunnel_filter_conf.inner_vlan = INNER_VLAN_ID;
363
364                 tunnel_filter_conf.queue_id = vdev->rx_q;
365                 tunnel_filter_conf.tunnel_type = RTE_TUNNEL_TYPE_VXLAN;
366
367                 ret = rte_eth_dev_filter_ctrl(ports[0],
368                                 RTE_ETH_FILTER_TUNNEL,
369                                 RTE_ETH_FILTER_DELETE,
370                                 &tunnel_filter_conf);
371                 if (ret) {
372                         RTE_LOG(ERR, VHOST_DATA,
373                                 "%d Failed to add device MAC address to cloud filter\n",
374                                 vdev->rx_q);
375                         return;
376                 }
377                 for (i = 0; i < ETHER_ADDR_LEN; i++)
378                         vdev->mac_address.addr_bytes[i] = 0;
379
380                 /* Clear out the receive buffers */
381                 rx_count = rte_eth_rx_burst(ports[0],
382                                 (uint16_t)vdev->rx_q,
383                                 pkts_burst, MAX_PKT_BURST);
384
385                 while (rx_count) {
386                         for (i = 0; i < rx_count; i++)
387                                 rte_pktmbuf_free(pkts_burst[i]);
388
389                         rx_count = rte_eth_rx_burst(ports[0],
390                                         (uint16_t)vdev->rx_q,
391                                         pkts_burst, MAX_PKT_BURST);
392                 }
393                 vdev->ready = DEVICE_MAC_LEARNING;
394         }
395 }
396
397 /* Transmit packets after encapsulating */
398 int
399 vxlan_tx_pkts(uint8_t port_id, uint16_t queue_id,
400                 struct rte_mbuf **tx_pkts, uint16_t nb_pkts) {
401         int ret = 0;
402         uint16_t i;
403
404         for (i = 0; i < nb_pkts; i++)
405                 vxlan_tx_process(queue_id, tx_pkts[i]);
406
407         ret = rte_eth_tx_burst(port_id, queue_id, tx_pkts, nb_pkts);
408
409         return ret;
410 }
411
412 /* Check for decapsulation and pass packets directly to VIRTIO device */
413 int
414 vxlan_rx_pkts(struct virtio_net *dev, struct rte_mbuf **pkts_burst,
415                 uint32_t rx_count)
416 {
417         uint32_t i = 0;
418         uint32_t count = 0;
419         int ret;
420         struct rte_mbuf *pkts_valid[rx_count];
421
422         for (i = 0; i < rx_count; i++) {
423                 ret = vxlan_rx_process(pkts_burst[i]);
424                 if (unlikely(ret < 0))
425                         continue;
426
427                 pkts_valid[count] = pkts_burst[i];
428                         count++;
429         }
430
431         ret = rte_vhost_enqueue_burst(dev, VIRTIO_RXQ, pkts_valid, count);
432         return ret;
433 }