examples/tep_term: add TSO offload configuration
[dpdk.git] / examples / tep_termination / vxlan_setup.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <getopt.h>
35 #include <linux/if_ether.h>
36 #include <linux/if_vlan.h>
37 #include <linux/virtio_net.h>
38 #include <linux/virtio_ring.h>
39 #include <sys/param.h>
40 #include <unistd.h>
41
42 #include <rte_ethdev.h>
43 #include <rte_log.h>
44 #include <rte_string_fns.h>
45 #include <rte_mbuf.h>
46 #include <rte_malloc.h>
47 #include <rte_ip.h>
48 #include <rte_udp.h>
49 #include <rte_tcp.h>
50
51 #include "main.h"
52 #include "rte_virtio_net.h"
53 #include "vxlan.h"
54 #include "vxlan_setup.h"
55
56 #define IPV4_HEADER_LEN 20
57 #define UDP_HEADER_LEN  8
58 #define VXLAN_HEADER_LEN 8
59
60 #define IP_VERSION 0x40
61 #define IP_HDRLEN  0x05 /* default IP header length == five 32-bits words. */
62 #define IP_DEFTTL  64   /* from RFC 1340. */
63 #define IP_VHL_DEF (IP_VERSION | IP_HDRLEN)
64
65 #define IP_DN_FRAGMENT_FLAG 0x0040
66
67 /* Used to compare MAC addresses. */
68 #define MAC_ADDR_CMP 0xFFFFFFFFFFFFULL
69
70 /* Configurable number of RX/TX ring descriptors */
71 #define RTE_TEST_RX_DESC_DEFAULT 1024
72 #define RTE_TEST_TX_DESC_DEFAULT 512
73
74 /* Default inner VLAN ID */
75 #define INNER_VLAN_ID 100
76
77 /* VXLAN device */
78 struct vxlan_conf vxdev;
79
80 struct ipv4_hdr app_ip_hdr[VXLAN_N_PORTS];
81 struct ether_hdr app_l2_hdr[VXLAN_N_PORTS];
82
83 /* local VTEP IP address */
84 uint8_t vxlan_multicast_ips[2][4] = { {239, 1, 1, 1 }, {239, 1, 2, 1 } };
85
86 /* Remote VTEP IP address */
87 uint8_t vxlan_overlay_ips[2][4] = { {192, 168, 10, 1}, {192, 168, 30, 1} };
88
89 /* Remote VTEP MAC address */
90 uint8_t peer_mac[6] = {0x00, 0x11, 0x01, 0x00, 0x00, 0x01};
91
92 /* VXLAN RX filter type */
93 uint8_t tep_filter_type[] = {RTE_TUNNEL_FILTER_IMAC_TENID,
94                         RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID,
95                         RTE_TUNNEL_FILTER_OMAC_TENID_IMAC,};
96
97 /* Options for configuring ethernet port */
98 static const struct rte_eth_conf port_conf = {
99         .rxmode = {
100                 .split_hdr_size = 0,
101                 .header_split   = 0, /**< Header Split disabled */
102                 .hw_ip_checksum = 0, /**< IP checksum offload disabled */
103                 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
104                 .jumbo_frame    = 0, /**< Jumbo Frame Support disabled */
105                 .hw_strip_crc   = 0, /**< CRC stripped by hardware */
106         },
107         .txmode = {
108                 .mq_mode = ETH_MQ_TX_NONE,
109         },
110 };
111
112 /**
113  * The one or two device(s) that belongs to the same tenant ID can
114  * be assigned in a VM.
115  */
116 const uint16_t tenant_id_conf[] = {
117         1000, 1000, 1001, 1001, 1002, 1002, 1003, 1003,
118         1004, 1004, 1005, 1005, 1006, 1006, 1007, 1007,
119         1008, 1008, 1009, 1009, 1010, 1010, 1011, 1011,
120         1012, 1012, 1013, 1013, 1014, 1014, 1015, 1015,
121         1016, 1016, 1017, 1017, 1018, 1018, 1019, 1019,
122         1020, 1020, 1021, 1021, 1022, 1022, 1023, 1023,
123         1024, 1024, 1025, 1025, 1026, 1026, 1027, 1027,
124         1028, 1028, 1029, 1029, 1030, 1030, 1031, 1031,
125 };
126
127 /**
128  * Initialises a given port using global settings and with the rx buffers
129  * coming from the mbuf_pool passed as parameter
130  */
131 int
132 vxlan_port_init(uint8_t port, struct rte_mempool *mbuf_pool)
133 {
134         int retval;
135         uint16_t q;
136         struct rte_eth_dev_info dev_info;
137         uint16_t rx_rings, tx_rings = (uint16_t)rte_lcore_count();
138         const uint16_t rx_ring_size = RTE_TEST_RX_DESC_DEFAULT;
139         const uint16_t tx_ring_size = RTE_TEST_TX_DESC_DEFAULT;
140         struct rte_eth_udp_tunnel tunnel_udp;
141         struct rte_eth_rxconf *rxconf;
142         struct rte_eth_txconf *txconf;
143         struct vxlan_conf *pconf = &vxdev;
144
145         pconf->dst_port = udp_port;
146
147         rte_eth_dev_info_get(port, &dev_info);
148
149         if (dev_info.max_rx_queues > MAX_QUEUES) {
150                 rte_exit(EXIT_FAILURE,
151                         "please define MAX_QUEUES no less than %u in %s\n",
152                         dev_info.max_rx_queues, __FILE__);
153         }
154
155         rxconf = &dev_info.default_rxconf;
156         txconf = &dev_info.default_txconf;
157         txconf->txq_flags = 0;
158
159         if (port >= rte_eth_dev_count())
160                 return -1;
161
162         rx_rings = nb_devices;
163
164         /* Configure ethernet device. */
165         retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
166         if (retval != 0)
167                 return retval;
168
169         /* Setup the queues. */
170         for (q = 0; q < rx_rings; q++) {
171                 retval = rte_eth_rx_queue_setup(port, q, rx_ring_size,
172                                                 rte_eth_dev_socket_id(port),
173                                                 rxconf,
174                                                 mbuf_pool);
175                 if (retval < 0)
176                         return retval;
177         }
178         for (q = 0; q < tx_rings; q++) {
179                 retval = rte_eth_tx_queue_setup(port, q, tx_ring_size,
180                                                 rte_eth_dev_socket_id(port),
181                                                 txconf);
182                 if (retval < 0)
183                         return retval;
184         }
185
186         /* Start the device. */
187         retval  = rte_eth_dev_start(port);
188         if (retval < 0)
189                 return retval;
190
191         /* Configure UDP port for UDP tunneling */
192         tunnel_udp.udp_port = udp_port;
193         tunnel_udp.prot_type = RTE_TUNNEL_TYPE_VXLAN;
194         retval = rte_eth_dev_udp_tunnel_add(port, &tunnel_udp);
195         if (retval < 0)
196                 return retval;
197         rte_eth_macaddr_get(port, &ports_eth_addr[port]);
198         RTE_LOG(INFO, PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
199                         " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
200                         (unsigned)port,
201                         ports_eth_addr[port].addr_bytes[0],
202                         ports_eth_addr[port].addr_bytes[1],
203                         ports_eth_addr[port].addr_bytes[2],
204                         ports_eth_addr[port].addr_bytes[3],
205                         ports_eth_addr[port].addr_bytes[4],
206                         ports_eth_addr[port].addr_bytes[5]);
207
208         if (tso_segsz != 0) {
209                 struct rte_eth_dev_info dev_info;
210                 rte_eth_dev_info_get(port, &dev_info);
211                 if ((dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO) == 0)
212                         RTE_LOG(WARNING, PORT,
213                                 "hardware TSO offload is not supported\n");
214         }
215         return 0;
216 }
217
218 static int
219 vxlan_rx_process(struct rte_mbuf *pkt)
220 {
221         return decapsulation(pkt);
222 }
223
224 static void
225 vxlan_tx_process(uint8_t queue_id, struct rte_mbuf *pkt)
226 {
227         encapsulation(pkt, queue_id);
228         return;
229 }
230
231 /*
232  * This function learns the MAC address of the device and set init
233  * L2 header and L3 header info.
234  */
235 int
236 vxlan_link(struct vhost_dev *vdev, struct rte_mbuf *m)
237 {
238         int i, ret;
239         struct ether_hdr *pkt_hdr;
240         struct virtio_net *dev = vdev->dev;
241         uint64_t portid = dev->device_fh;
242         struct ipv4_hdr *ip;
243
244         struct rte_eth_tunnel_filter_conf tunnel_filter_conf;
245
246         if (unlikely(portid > VXLAN_N_PORTS)) {
247                 RTE_LOG(INFO, VHOST_DATA,
248                         "(%"PRIu64") WARNING: Not configuring device,"
249                         "as already have %d ports for VXLAN.",
250                         dev->device_fh, VXLAN_N_PORTS);
251                 return -1;
252         }
253
254         /* Learn MAC address of guest device from packet */
255         pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
256         if (is_same_ether_addr(&(pkt_hdr->s_addr), &vdev->mac_address)) {
257                 RTE_LOG(INFO, VHOST_DATA,
258                         "(%"PRIu64") WARNING: This device is using an existing"
259                         " MAC address and has not been registered.\n",
260                         dev->device_fh);
261                 return -1;
262         }
263
264         for (i = 0; i < ETHER_ADDR_LEN; i++) {
265                 vdev->mac_address.addr_bytes[i] =
266                         vxdev.port[portid].vport_mac.addr_bytes[i] =
267                         pkt_hdr->s_addr.addr_bytes[i];
268                 vxdev.port[portid].peer_mac.addr_bytes[i] = peer_mac[i];
269         }
270
271         memset(&tunnel_filter_conf, 0,
272                 sizeof(struct rte_eth_tunnel_filter_conf));
273
274         tunnel_filter_conf.outer_mac = &ports_eth_addr[0];
275         tunnel_filter_conf.filter_type = tep_filter_type[filter_idx];
276
277         /* inner MAC */
278         tunnel_filter_conf.inner_mac = &vdev->mac_address;
279
280         tunnel_filter_conf.queue_id = vdev->rx_q;
281         tunnel_filter_conf.tenant_id = tenant_id_conf[vdev->rx_q];
282
283         if (tep_filter_type[filter_idx] == RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID)
284                 tunnel_filter_conf.inner_vlan = INNER_VLAN_ID;
285
286         tunnel_filter_conf.tunnel_type = RTE_TUNNEL_TYPE_VXLAN;
287
288         ret = rte_eth_dev_filter_ctrl(ports[0],
289                 RTE_ETH_FILTER_TUNNEL,
290                 RTE_ETH_FILTER_ADD,
291                 &tunnel_filter_conf);
292         if (ret) {
293                 RTE_LOG(ERR, VHOST_DATA,
294                         "%d Failed to add device MAC address to cloud filter\n",
295                 vdev->rx_q);
296                 return -1;
297         }
298
299         /* Print out inner MAC and VNI info. */
300         RTE_LOG(INFO, VHOST_DATA,
301                 "(%d) MAC_ADDRESS %02x:%02x:%02x:%02x:%02x:%02x and VNI %d registered\n",
302                 vdev->rx_q,
303                 vdev->mac_address.addr_bytes[0],
304                 vdev->mac_address.addr_bytes[1],
305                 vdev->mac_address.addr_bytes[2],
306                 vdev->mac_address.addr_bytes[3],
307                 vdev->mac_address.addr_bytes[4],
308                 vdev->mac_address.addr_bytes[5],
309                 tenant_id_conf[vdev->rx_q]);
310
311         vxdev.port[portid].vport_id = portid;
312
313         for (i = 0; i < 4; i++) {
314                 /* Local VTEP IP */
315                 vxdev.port_ip |= vxlan_multicast_ips[portid][i] << (8 * i);
316                 /* Remote VTEP IP */
317                 vxdev.port[portid].peer_ip |=
318                         vxlan_overlay_ips[portid][i] << (8 * i);
319         }
320
321         vxdev.out_key = tenant_id_conf[vdev->rx_q];
322         ether_addr_copy(&vxdev.port[portid].peer_mac,
323                         &app_l2_hdr[portid].d_addr);
324         ether_addr_copy(&ports_eth_addr[0],
325                         &app_l2_hdr[portid].s_addr);
326         app_l2_hdr[portid].ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
327
328         ip = &app_ip_hdr[portid];
329         ip->version_ihl = IP_VHL_DEF;
330         ip->type_of_service = 0;
331         ip->total_length = 0;
332         ip->packet_id = 0;
333         ip->fragment_offset = IP_DN_FRAGMENT_FLAG;
334         ip->time_to_live = IP_DEFTTL;
335         ip->next_proto_id = IPPROTO_UDP;
336         ip->hdr_checksum = 0;
337         ip->src_addr = vxdev.port_ip;
338         ip->dst_addr = vxdev.port[portid].peer_ip;
339
340         /* Set device as ready for RX. */
341         vdev->ready = DEVICE_RX;
342
343         return 0;
344 }
345
346 /**
347  * Removes cloud filter. Ensures that nothing is adding buffers to the RX
348  * queue before disabling RX on the device.
349  */
350 void
351 vxlan_unlink(struct vhost_dev *vdev)
352 {
353         unsigned i = 0, rx_count;
354         int ret;
355         struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
356         struct rte_eth_tunnel_filter_conf tunnel_filter_conf;
357
358         if (vdev->ready == DEVICE_RX) {
359                 memset(&tunnel_filter_conf, 0,
360                         sizeof(struct rte_eth_tunnel_filter_conf));
361
362                 tunnel_filter_conf.outer_mac = &ports_eth_addr[0];
363                 tunnel_filter_conf.inner_mac = &vdev->mac_address;
364                 tunnel_filter_conf.tenant_id = tenant_id_conf[vdev->rx_q];
365                 tunnel_filter_conf.filter_type = tep_filter_type[filter_idx];
366
367                 if (tep_filter_type[filter_idx] ==
368                         RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID)
369                         tunnel_filter_conf.inner_vlan = INNER_VLAN_ID;
370
371                 tunnel_filter_conf.queue_id = vdev->rx_q;
372                 tunnel_filter_conf.tunnel_type = RTE_TUNNEL_TYPE_VXLAN;
373
374                 ret = rte_eth_dev_filter_ctrl(ports[0],
375                                 RTE_ETH_FILTER_TUNNEL,
376                                 RTE_ETH_FILTER_DELETE,
377                                 &tunnel_filter_conf);
378                 if (ret) {
379                         RTE_LOG(ERR, VHOST_DATA,
380                                 "%d Failed to add device MAC address to cloud filter\n",
381                                 vdev->rx_q);
382                         return;
383                 }
384                 for (i = 0; i < ETHER_ADDR_LEN; i++)
385                         vdev->mac_address.addr_bytes[i] = 0;
386
387                 /* Clear out the receive buffers */
388                 rx_count = rte_eth_rx_burst(ports[0],
389                                 (uint16_t)vdev->rx_q,
390                                 pkts_burst, MAX_PKT_BURST);
391
392                 while (rx_count) {
393                         for (i = 0; i < rx_count; i++)
394                                 rte_pktmbuf_free(pkts_burst[i]);
395
396                         rx_count = rte_eth_rx_burst(ports[0],
397                                         (uint16_t)vdev->rx_q,
398                                         pkts_burst, MAX_PKT_BURST);
399                 }
400                 vdev->ready = DEVICE_MAC_LEARNING;
401         }
402 }
403
404 /* Transmit packets after encapsulating */
405 int
406 vxlan_tx_pkts(uint8_t port_id, uint16_t queue_id,
407                 struct rte_mbuf **tx_pkts, uint16_t nb_pkts) {
408         int ret = 0;
409         uint16_t i;
410
411         for (i = 0; i < nb_pkts; i++)
412                 vxlan_tx_process(queue_id, tx_pkts[i]);
413
414         ret = rte_eth_tx_burst(port_id, queue_id, tx_pkts, nb_pkts);
415
416         return ret;
417 }
418
419 /* Check for decapsulation and pass packets directly to VIRTIO device */
420 int
421 vxlan_rx_pkts(struct virtio_net *dev, struct rte_mbuf **pkts_burst,
422                 uint32_t rx_count)
423 {
424         uint32_t i = 0;
425         uint32_t count = 0;
426         int ret;
427         struct rte_mbuf *pkts_valid[rx_count];
428
429         for (i = 0; i < rx_count; i++) {
430                 ret = vxlan_rx_process(pkts_burst[i]);
431                 if (unlikely(ret < 0))
432                         continue;
433
434                 pkts_valid[count] = pkts_burst[i];
435                         count++;
436         }
437
438         ret = rte_vhost_enqueue_burst(dev, VIRTIO_RXQ, pkts_valid, count);
439         return ret;
440 }