5778b25a9b23e7e746e79651ba92abb02f35806e
[dpdk.git] / lib / librte_pmd_bond / rte_eth_bond_alb.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include "rte_eth_bond_private.h"
35 #include "rte_eth_bond_alb.h"
36
37 static inline uint8_t
38 simple_hash(uint8_t *hash_start, int hash_size)
39 {
40         int i;
41         uint8_t hash;
42
43         hash = 0;
44         for (i = 0; i < hash_size; ++i)
45                 hash ^= hash_start[i];
46
47         return hash;
48 }
49
50 static uint8_t
51 calculate_slave(struct bond_dev_private *internals)
52 {
53         uint8_t idx;
54
55         idx = (internals->mode6.last_slave + 1) % internals->active_slave_count;
56         internals->mode6.last_slave = idx;
57         return internals->active_slaves[idx];
58 }
59
60 int
61 bond_mode_alb_enable(struct rte_eth_dev *bond_dev)
62 {
63         struct bond_dev_private *internals = bond_dev->data->dev_private;
64         struct client_data *hash_table = internals->mode6.client_table;
65
66         uint16_t element_size;
67         char mem_name[RTE_ETH_NAME_MAX_LEN];
68         int socket_id = bond_dev->pci_dev->numa_node;
69
70         /* Fill hash table with initial values */
71         memset(hash_table, 0, sizeof(struct client_data) * ALB_HASH_TABLE_SIZE);
72         rte_spinlock_init(&internals->mode6.lock);
73         internals->mode6.last_slave = ALB_NULL_INDEX;
74         internals->mode6.ntt = 0;
75
76         /* Initialize memory pool for ARP packets to send */
77         if (internals->mode6.mempool == NULL) {
78                 /*
79                  * 256 is size of ETH header, ARP header and nested VLAN headers.
80                  * The value is chosen to be cache aligned.
81                  */
82                 element_size = 256 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM;
83                 snprintf(mem_name, sizeof(mem_name), "%s_MODE6", bond_dev->data->name);
84                 internals->mode6.mempool = rte_mempool_create(mem_name,
85                                 512 * RTE_MAX_ETHPORTS,
86                                 element_size,
87                                 RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ?
88                                                 32 : RTE_MEMPOOL_CACHE_MAX_SIZE,
89                                 sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init,
90                                 NULL, rte_pktmbuf_init, NULL, socket_id, 0);
91
92                 if (internals->mode6.mempool == NULL) {
93                         RTE_LOG(ERR, PMD, "%s: Failed to initialize ALB mempool.\n",
94                                         bond_dev->data->name);
95                         rte_panic(
96                                         "Failed to allocate memory pool ('%s')\n"
97                                         "for bond device '%s'\n",
98                                         mem_name, bond_dev->data->name);
99                 }
100         }
101
102         return 0;
103 }
104
105 void bond_mode_alb_arp_recv(struct ether_hdr *eth_h, uint16_t offset,
106                 struct bond_dev_private *internals) {
107         struct arp_hdr *arp;
108
109         struct client_data *hash_table = internals->mode6.client_table;
110         struct client_data *client_info;
111
112         uint8_t hash_index;
113
114         arp = (struct arp_hdr *) ((char *) (eth_h + 1) + offset);
115
116         /* ARP Requests are forwarded to the application with no changes */
117         if (arp->arp_op != rte_cpu_to_be_16(ARP_OP_REPLY))
118                 return;
119
120         /* From now on, we analyze only ARP Reply packets */
121         hash_index = simple_hash((uint8_t *) &arp->arp_data.arp_sip,
122                         sizeof(arp->arp_data.arp_sip));
123         client_info = &hash_table[hash_index];
124
125         /*
126          * We got reply for ARP Request send by the application. We need to
127          * update client table when received data differ from what is stored
128          * in ALB table and issue sending update packet to that slave.
129          */
130         rte_spinlock_lock(&internals->mode6.lock);
131         if (client_info->in_use == 0 ||
132                         client_info->app_ip != arp->arp_data.arp_tip ||
133                         client_info->cli_ip != arp->arp_data.arp_sip ||
134                         !is_same_ether_addr(&client_info->cli_mac, &arp->arp_data.arp_sha) ||
135                         client_info->vlan_count != offset / sizeof(struct vlan_hdr) ||
136                         memcmp(client_info->vlan, eth_h + 1, offset) != 0
137         ) {
138                 client_info->in_use = 1;
139                 client_info->app_ip = arp->arp_data.arp_tip;
140                 client_info->cli_ip = arp->arp_data.arp_sip;
141                 ether_addr_copy(&arp->arp_data.arp_sha, &client_info->cli_mac);
142                 client_info->slave_idx = calculate_slave(internals);
143                 rte_eth_macaddr_get(client_info->slave_idx, &client_info->app_mac);
144                 ether_addr_copy(&client_info->app_mac, &arp->arp_data.arp_tha);
145                 memcpy(client_info->vlan, eth_h + 1, offset);
146                 client_info->vlan_count = offset / sizeof(struct vlan_hdr);
147         }
148         internals->mode6.ntt = 1;
149         rte_spinlock_unlock(&internals->mode6.lock);
150 }
151
152 uint8_t
153 bond_mode_alb_arp_xmit(struct ether_hdr *eth_h, uint16_t offset,
154                 struct bond_dev_private *internals)
155 {
156         struct arp_hdr *arp;
157
158         struct client_data *hash_table = internals->mode6.client_table;
159         struct client_data *client_info;
160
161         uint8_t hash_index;
162
163         struct ether_addr bonding_mac;
164
165         arp = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
166
167         /*
168          * Traffic with src MAC other than bonding should be sent on
169          * current primary port.
170          */
171         rte_eth_macaddr_get(internals->port_id, &bonding_mac);
172         if (!is_same_ether_addr(&bonding_mac, &arp->arp_data.arp_sha)) {
173                 rte_eth_macaddr_get(internals->current_primary_port,
174                                 &arp->arp_data.arp_sha);
175                 return internals->current_primary_port;
176         }
177
178         hash_index = simple_hash((uint8_t *)&arp->arp_data.arp_tip,
179                         sizeof(uint32_t));
180         client_info = &hash_table[hash_index];
181
182         rte_spinlock_lock(&internals->mode6.lock);
183         if (arp->arp_op == rte_cpu_to_be_16(ARP_OP_REPLY)) {
184                 if (client_info->in_use) {
185                         if (client_info->app_ip == arp->arp_data.arp_sip &&
186                                 client_info->cli_ip == arp->arp_data.arp_tip) {
187                                 /* Entry is already assigned to this client */
188                                 if (!is_broadcast_ether_addr(&arp->arp_data.arp_tha)) {
189                                         ether_addr_copy(&arp->arp_data.arp_tha,
190                                                         &client_info->cli_mac);
191                                 }
192                                 rte_eth_macaddr_get(client_info->slave_idx,
193                                                 &client_info->app_mac);
194                                 ether_addr_copy(&client_info->app_mac, &arp->arp_data.arp_sha);
195                                 memcpy(client_info->vlan, eth_h + 1, offset);
196                                 client_info->vlan_count = offset / sizeof(struct vlan_hdr);
197                                 rte_spinlock_unlock(&internals->mode6.lock);
198                                 return client_info->slave_idx;
199                         }
200                 }
201
202                 /* Assign new slave to this client and update src mac in ARP */
203                 client_info->in_use = 1;
204                 client_info->ntt = 0;
205                 client_info->app_ip = arp->arp_data.arp_sip;
206                 ether_addr_copy(&arp->arp_data.arp_tha, &client_info->cli_mac);
207                 client_info->cli_ip = arp->arp_data.arp_tip;
208                 client_info->slave_idx = calculate_slave(internals);
209                 rte_eth_macaddr_get(client_info->slave_idx, &client_info->app_mac);
210                 ether_addr_copy(&client_info->app_mac, &arp->arp_data.arp_sha);
211                 memcpy(client_info->vlan, eth_h + 1, offset);
212                 client_info->vlan_count = offset / sizeof(struct vlan_hdr);
213                 rte_spinlock_unlock(&internals->mode6.lock);
214                 return client_info->slave_idx;
215         }
216
217         /* If packet is not ARP Reply, send it on current primary port. */
218         rte_spinlock_unlock(&internals->mode6.lock);
219         rte_eth_macaddr_get(internals->current_primary_port,
220                         &arp->arp_data.arp_sha);
221         return internals->current_primary_port;
222 }
223
224 uint8_t
225 bond_mode_alb_arp_upd(struct client_data *client_info,
226                 struct rte_mbuf *pkt, struct bond_dev_private *internals)
227 {
228         struct ether_hdr *eth_h;
229         struct arp_hdr *arp_h;
230         uint8_t slave_idx;
231
232         rte_spinlock_lock(&internals->mode6.lock);
233         eth_h = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
234
235         ether_addr_copy(&client_info->app_mac, &eth_h->s_addr);
236         ether_addr_copy(&client_info->cli_mac, &eth_h->d_addr);
237         if (client_info->vlan_count > 0)
238                 eth_h->ether_type = rte_cpu_to_be_16(ETHER_TYPE_VLAN);
239         else
240                 eth_h->ether_type = rte_cpu_to_be_16(ETHER_TYPE_ARP);
241
242         arp_h = (struct arp_hdr *)((char *)eth_h + sizeof(struct ether_hdr)
243                         + client_info->vlan_count * sizeof(struct vlan_hdr));
244
245         memcpy(eth_h + 1, client_info->vlan,
246                         client_info->vlan_count * sizeof(struct vlan_hdr));
247
248         ether_addr_copy(&client_info->app_mac, &arp_h->arp_data.arp_sha);
249         arp_h->arp_data.arp_sip = client_info->app_ip;
250         ether_addr_copy(&client_info->cli_mac, &arp_h->arp_data.arp_tha);
251         arp_h->arp_data.arp_tip = client_info->cli_ip;
252
253         arp_h->arp_hrd = rte_cpu_to_be_16(ARP_HRD_ETHER);
254         arp_h->arp_pro = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
255         arp_h->arp_hln = ETHER_ADDR_LEN;
256         arp_h->arp_pln = sizeof(uint32_t);
257         arp_h->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY);
258
259         slave_idx = client_info->slave_idx;
260         rte_spinlock_unlock(&internals->mode6.lock);
261
262         return slave_idx;
263 }
264
265 void
266 bond_mode_alb_client_list_upd(struct rte_eth_dev *bond_dev)
267 {
268         struct bond_dev_private *internals = bond_dev->data->dev_private;
269         struct client_data *client_info;
270
271         int i;
272
273         /* If active slave count is 0, it's pointless to refresh alb table */
274         if (internals->active_slave_count <= 0)
275                 return;
276
277         rte_spinlock_lock(&internals->mode6.lock);
278         internals->mode6.last_slave = ALB_NULL_INDEX;
279
280         for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
281                 client_info = &internals->mode6.client_table[i];
282                 if (client_info->in_use) {
283                         client_info->slave_idx = calculate_slave(internals);
284                         rte_eth_macaddr_get(client_info->slave_idx, &client_info->app_mac);
285                         internals->mode6.ntt = 1;
286                 }
287         }
288         rte_spinlock_unlock(&internals->mode6.lock);
289 }