net/bnxt: add HA support in ULP
[dpdk.git] / drivers / net / bnxt / tf_ulp / ulp_ha_mgr.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2019-2021 Broadcom
3  * All rights reserved.
4  */
5
6 #include <rte_common.h>
7 #include <rte_cycles.h>
8 #include <rte_malloc.h>
9 #include <rte_log.h>
10 #include <rte_alarm.h>
11 #include "bnxt.h"
12 #include "bnxt_ulp.h"
13 #include "bnxt_tf_common.h"
14 #include "ulp_ha_mgr.h"
15 #include "ulp_flow_db.h"
16
17 /* Local only MACROs and defines that aren't exported */
18 #define ULP_HA_TIMER_THREAD     (1 << 0)
19 #define ULP_HA_TIMER_IS_RUNNING(info) (!!((info)->flags & ULP_HA_TIMER_THREAD))
20 #define ULP_HA_TIMER_SEC 1
21 #define ULP_HA_WAIT_TIME (MS_PER_S / 10)
22 #define ULP_HA_WAIT_TIMEOUT (MS_PER_S * 2)
23
24 #define ULP_HA_IF_TBL_DIR       TF_DIR_RX
25 #define ULP_HA_IF_TBL_TYPE      TF_IF_TBL_TYPE_PROF_PARIF_ERR_ACT_REC_PTR
26 #define ULP_HA_IF_TBL_IDX 10
27
28 static void ulp_ha_mgr_timer_cancel(struct bnxt_ulp_context *ulp_ctx);
29 static int32_t ulp_ha_mgr_timer_start(struct bnxt_ulp_context *ulp_ctx);
30 static void ulp_ha_mgr_timer_cb(void *arg);
31 static int32_t ulp_ha_mgr_app_type_set(struct bnxt_ulp_context *ulp_ctx,
32                                 enum ulp_ha_mgr_app_type app_type);
33 static int32_t
34 ulp_ha_mgr_region_set(struct bnxt_ulp_context *ulp_ctx,
35                       enum ulp_ha_mgr_region region);
36 static int32_t
37 ulp_ha_mgr_state_set(struct bnxt_ulp_context *ulp_ctx,
38                      enum ulp_ha_mgr_state state);
39
40 static int32_t
41 ulp_ha_mgr_state_set(struct bnxt_ulp_context *ulp_ctx,
42                      enum ulp_ha_mgr_state state)
43 {
44         struct tf_set_if_tbl_entry_parms set_parms = { 0 };
45         struct tf *tfp;
46         uint32_t val = 0;
47         int32_t rc = 0;
48
49         if (ulp_ctx == NULL) {
50                 BNXT_TF_DBG(ERR, "Invalid parms in state get.\n");
51                 return -EINVAL;
52         }
53         tfp = bnxt_ulp_cntxt_tfp_get(ulp_ctx, BNXT_ULP_SHARED_SESSION_NO);
54         if (tfp == NULL) {
55                 BNXT_TF_DBG(ERR, "Unable to get the TFP.\n");
56                 return -EINVAL;
57         }
58
59         val = (uint32_t)state;
60
61         set_parms.dir = ULP_HA_IF_TBL_DIR;
62         set_parms.type = ULP_HA_IF_TBL_TYPE;
63         set_parms.data = (uint8_t *)&val;
64         set_parms.data_sz_in_bytes = sizeof(val);
65         set_parms.idx = ULP_HA_IF_TBL_IDX;
66
67         rc = tf_set_if_tbl_entry(tfp, &set_parms);
68         if (rc)
69                 BNXT_TF_DBG(ERR, "Failed to write the HA state\n");
70
71         return rc;
72 }
73
74 static int32_t
75 ulp_ha_mgr_region_set(struct bnxt_ulp_context *ulp_ctx,
76                       enum ulp_ha_mgr_region region)
77 {
78         struct bnxt_ulp_ha_mgr_info *ha_info;
79
80         if (ulp_ctx == NULL) {
81                 BNXT_TF_DBG(ERR, "Invalid params in ha region get.\n");
82                 return -EINVAL;
83         }
84
85         ha_info = bnxt_ulp_cntxt_ptr2_ha_info_get(ulp_ctx);
86         if (ha_info == NULL) {
87                 BNXT_TF_DBG(ERR, "Unable to get ha info\n");
88                 return -EINVAL;
89         }
90         ha_info->region = region;
91
92         return 0;
93 }
94
95 static int32_t
96 ulp_ha_mgr_app_type_set(struct bnxt_ulp_context *ulp_ctx,
97                         enum ulp_ha_mgr_app_type app_type)
98 {
99         struct bnxt_ulp_ha_mgr_info *ha_info;
100
101         if (ulp_ctx == NULL) {
102                 BNXT_TF_DBG(ERR, "Invalid Parms.\n");
103                 return -EINVAL;
104         }
105
106         ha_info = bnxt_ulp_cntxt_ptr2_ha_info_get(ulp_ctx);
107         if (ha_info == NULL) {
108                 BNXT_TF_DBG(ERR, "Unable to get the ha info.\n");
109                 return -EINVAL;
110         }
111         ha_info->app_type = app_type;
112
113         return 0;
114 }
115
116 /*
117  * When a secondary opens, the timer is started and periodically checks for a
118  * close of the primary (state moved to SEC_TIMER_COPY).
119  * In SEC_TIMER_COPY:
120  * - The flow db must be locked to prevent flows from being added to the high
121  *   region during a move.
122  * - Move the high entries to low
123  * - Set the region to low for subsequent flows
124  * - Switch our persona to Primary
125  * - Set the state to Primary Run
126  * - Release the flow db lock for flows to continue
127  */
128 static void
129 ulp_ha_mgr_timer_cb(void *arg)
130 {
131         struct tf_move_tcam_shared_entries_parms mparms = { 0 };
132         struct bnxt_ulp_context *ulp_ctx;
133         enum ulp_ha_mgr_state curr_state;
134         struct tf *tfp;
135         int32_t rc;
136
137         ulp_ctx = (struct bnxt_ulp_context *)arg;
138         rc = ulp_ha_mgr_state_get(ulp_ctx, &curr_state);
139         if (rc) {
140                 /*
141                  * This shouldn't happen, if it does, resetart the timer
142                  * and try again next time.
143                  */
144                 BNXT_TF_DBG(ERR, "On HA CB:Failed(%d) to get state.\n", rc);
145                 goto cb_restart;
146         }
147         if (curr_state != ULP_HA_STATE_SEC_TIMER_COPY)
148                 goto cb_restart;
149
150         /* Protect the flow database during the copy */
151         if (bnxt_ulp_cntxt_acquire_fdb_lock(ulp_ctx)) {
152                 /* Should not fail, if we do, restart timer and try again */
153                 BNXT_TF_DBG(ERR, "Flow db lock acquire failed\n");
154                 goto cb_restart;
155         }
156         /* All paths after this point must release the fdb lock */
157
158         /* The Primary has issued a close and we are in the timer copy
159          * phase.  Become the new Primary, Set state to Primary Run and
160          * move WC entries to Low Region.
161          */
162         BNXT_TF_DBG(INFO, "On HA CB: Moving entries HI to LOW\n");
163         mparms.dir = TF_DIR_RX;
164         mparms.tcam_tbl_type = TF_TCAM_TBL_TYPE_WC_TCAM_HIGH;
165         tfp = bnxt_ulp_cntxt_tfp_get(ulp_ctx, BNXT_ULP_SHARED_SESSION_YES);
166         if (tfp == NULL) {
167                 BNXT_TF_DBG(ERR, "On HA CB: Unable to get the TFP.\n");
168                 goto unlock;
169         }
170
171         rc = tf_move_tcam_shared_entries(tfp, &mparms);
172         if (rc) {
173                 BNXT_TF_DBG(ERR, "On HA_CB: Failed to move entries\n");
174                 goto unlock;
175         }
176
177         ulp_ha_mgr_region_set(ulp_ctx, ULP_HA_REGION_LOW);
178         ulp_ha_mgr_app_type_set(ulp_ctx, ULP_HA_APP_TYPE_PRIM);
179         ulp_ha_mgr_state_set(ulp_ctx, ULP_HA_STATE_PRIM_RUN);
180         BNXT_TF_DBG(INFO, "On HA CB: SEC[SEC_TIMER_COPY] => PRIM[PRIM_RUN]\n");
181 unlock:
182         bnxt_ulp_cntxt_release_fdb_lock(ulp_ctx);
183         return;
184 cb_restart:
185         ulp_ha_mgr_timer_start(ulp_ctx);
186 }
187
188 static int32_t
189 ulp_ha_mgr_timer_start(struct bnxt_ulp_context *ulp_ctx)
190 {
191         struct bnxt_ulp_ha_mgr_info *ha_info;
192
193         if (ulp_ctx == NULL) {
194                 BNXT_TF_DBG(ERR, "Invalid parmsi for ha timer start.\n");
195                 return -EINVAL;
196         }
197
198         ha_info = bnxt_ulp_cntxt_ptr2_ha_info_get(ulp_ctx);
199
200         if (ha_info == NULL) {
201                 BNXT_TF_DBG(ERR, "Unable to get HA Info in timer start.\n");
202                 return -EINVAL;
203         }
204         ha_info->flags |= ULP_HA_TIMER_THREAD;
205         rte_eal_alarm_set(US_PER_S * ULP_HA_TIMER_SEC,
206                           ulp_ha_mgr_timer_cb,
207                           (void *)ulp_ctx);
208         return 0;
209 }
210
211 static void
212 ulp_ha_mgr_timer_cancel(struct bnxt_ulp_context *ulp_ctx)
213 {
214         struct bnxt_ulp_ha_mgr_info *ha_info;
215
216         ha_info = bnxt_ulp_cntxt_ptr2_ha_info_get(ulp_ctx);
217         if (ha_info == NULL) {
218                 BNXT_TF_DBG(ERR, "Unable to get ha info\n");
219                 return;
220         }
221
222         ha_info->flags &= ~ULP_HA_TIMER_THREAD;
223         rte_eal_alarm_cancel(ulp_ha_mgr_timer_cb, (void *)ulp_ctx);
224 }
225
226 int32_t
227 ulp_ha_mgr_init(struct bnxt_ulp_context *ulp_ctx)
228 {
229         struct bnxt_ulp_ha_mgr_info *ha_info;
230         int32_t rc;
231         ha_info = rte_zmalloc("ulp_ha_mgr_info", sizeof(*ha_info), 0);
232         if (!ha_info)
233                 return -ENOMEM;
234
235         /* Add the HA info tbl to the ulp context. */
236         bnxt_ulp_cntxt_ptr2_ha_info_set(ulp_ctx, ha_info);
237
238         rc = pthread_mutex_init(&ha_info->ha_lock, NULL);
239         if (rc) {
240                 PMD_DRV_LOG(ERR, "Failed to initialize ha mutex\n");
241                 goto cleanup;
242         }
243
244         return 0;
245 cleanup:
246         if (ha_info != NULL)
247                 ulp_ha_mgr_deinit(ulp_ctx);
248         return -ENOMEM;
249 }
250
251 void
252 ulp_ha_mgr_deinit(struct bnxt_ulp_context *ulp_ctx)
253 {
254         struct bnxt_ulp_ha_mgr_info *ha_info;
255
256         ha_info = bnxt_ulp_cntxt_ptr2_ha_info_get(ulp_ctx);
257         if (ha_info == NULL) {
258                 BNXT_TF_DBG(ERR, "Unable to get HA Info for deinit.\n");
259                 return;
260         }
261
262         pthread_mutex_destroy(&ha_info->ha_lock);
263         rte_free(ha_info);
264
265         bnxt_ulp_cntxt_ptr2_ha_info_set(ulp_ctx, NULL);
266 }
267
268 int32_t
269 ulp_ha_mgr_app_type_get(struct bnxt_ulp_context *ulp_ctx,
270                         enum ulp_ha_mgr_app_type *app_type)
271 {
272         struct bnxt_ulp_ha_mgr_info *ha_info;
273
274         if (ulp_ctx == NULL || app_type == NULL) {
275                 BNXT_TF_DBG(ERR, "Invalid Parms.\n");
276                 return -EINVAL;
277         }
278
279         ha_info = bnxt_ulp_cntxt_ptr2_ha_info_get(ulp_ctx);
280         if (ha_info == NULL) {
281                 BNXT_TF_DBG(ERR, "Unable to get the HA info.\n");
282                 return -EINVAL;
283         }
284         *app_type = ha_info->app_type;
285
286         return 0;
287 }
288
289 int32_t
290 ulp_ha_mgr_state_get(struct bnxt_ulp_context *ulp_ctx,
291                      enum ulp_ha_mgr_state *state)
292 {
293         struct tf_get_if_tbl_entry_parms get_parms = { 0 };
294         struct tf *tfp;
295         uint32_t val = 0;
296         int32_t rc = 0;
297
298         if (ulp_ctx == NULL || state == NULL) {
299                 BNXT_TF_DBG(ERR, "Invalid parms in state get.\n");
300                 return -EINVAL;
301         }
302         tfp = bnxt_ulp_cntxt_tfp_get(ulp_ctx, BNXT_ULP_SHARED_SESSION_NO);
303         if (tfp == NULL) {
304                 BNXT_TF_DBG(ERR, "Unable to get the TFP.\n");
305                 return -EINVAL;
306         }
307
308         get_parms.dir = ULP_HA_IF_TBL_DIR;
309         get_parms.type = ULP_HA_IF_TBL_TYPE;
310         get_parms.idx = ULP_HA_IF_TBL_IDX;
311         get_parms.data = (uint8_t *)&val;
312         get_parms.data_sz_in_bytes = sizeof(val);
313
314         rc = tf_get_if_tbl_entry(tfp, &get_parms);
315         if (rc)
316                 BNXT_TF_DBG(ERR, "Failed to read the HA state\n");
317
318         *state = val;
319         return rc;
320 }
321
322 int32_t
323 ulp_ha_mgr_open(struct bnxt_ulp_context *ulp_ctx)
324 {
325         enum ulp_ha_mgr_state curr_state;
326         int32_t rc;
327
328         rc = ulp_ha_mgr_state_get(ulp_ctx, &curr_state);
329         if (rc) {
330                 BNXT_TF_DBG(ERR, "Failed to get HA state on Open (%d)\n", rc);
331                 return -EINVAL;
332         }
333
334         /*
335          * An Open can only occur during the Init and Primary Run states. During
336          * Init, the system attempting to Open will become the only system
337          * running. During Primary Run, the system attempting to Open will
338          * become the secondary system temporarily, and should eventually be
339          * transitioned to the primary system.
340          */
341         switch (curr_state) {
342         case ULP_HA_STATE_INIT:
343                 /*
344                  * No system is running, as we are the primary.  Since no other
345                  * system is running, we start writing into the low region.  By
346                  * writing into the low region, we save room for the secondary
347                  * system to override our entries by using the high region.
348                  */
349                 ulp_ha_mgr_app_type_set(ulp_ctx, ULP_HA_APP_TYPE_PRIM);
350                 ulp_ha_mgr_region_set(ulp_ctx, ULP_HA_REGION_LOW);
351                 rc = ulp_ha_mgr_state_set(ulp_ctx, ULP_HA_STATE_PRIM_RUN);
352                 if (rc) {
353                         BNXT_TF_DBG(ERR, "On Open: Failed to set PRIM_RUN.\n");
354                         return -EINVAL;
355                 }
356
357                 BNXT_TF_DBG(INFO, "On Open: [INIT] => PRIM[PRIM_RUN]\n");
358                 break;
359         case ULP_HA_STATE_PRIM_RUN:
360                 /*
361                  * The secondary system is starting in order to take over.
362                  * The current primary is expected to eventually close and pass
363                  * full control to this system;however, until the primary closes
364                  * both are operational.
365                  *
366                  * The timer is started in order to determine when the
367                  * primary has closed.
368                  */
369                 ulp_ha_mgr_app_type_set(ulp_ctx, ULP_HA_APP_TYPE_SEC);
370                 ulp_ha_mgr_region_set(ulp_ctx, ULP_HA_REGION_HI);
371
372                 /*
373                  * TODO:
374                  * Clear the high region so the secondary can begin overriding
375                  * the current entries.
376                  */
377                 rc = ulp_ha_mgr_timer_start(ulp_ctx);
378                 if (rc) {
379                         BNXT_TF_DBG(ERR, "Unable to start timer on HA Open.\n");
380                         return -EINVAL;
381                 }
382
383                 rc = ulp_ha_mgr_state_set(ulp_ctx, ULP_HA_STATE_PRIM_SEC_RUN);
384                 if (rc) {
385                         BNXT_TF_DBG(ERR, "On Open: Failed to set PRIM_SEC_RUN\n");
386                         return -EINVAL;
387                 }
388                 BNXT_TF_DBG(INFO, "On Open: [PRIM_RUN] => [PRIM_SEC_RUN]\n");
389                 break;
390         default:
391                 BNXT_TF_DBG(ERR, "On Open: Unknown state 0x%x\n", curr_state);
392                 return -EINVAL;
393         }
394
395         return 0;
396 }
397
398 int32_t
399 ulp_ha_mgr_close(struct bnxt_ulp_context *ulp_ctx)
400 {
401         enum ulp_ha_mgr_state curr_state, next_state, poll_state;
402         enum ulp_ha_mgr_app_type app_type;
403         int32_t timeout;
404         int32_t rc;
405
406         rc = ulp_ha_mgr_state_get(ulp_ctx, &curr_state);
407         if (rc) {
408                 BNXT_TF_DBG(ERR, "On Close: Failed(%d) to get HA state\n", rc);
409                 return -EINVAL;
410         }
411
412         rc = ulp_ha_mgr_app_type_get(ulp_ctx, &app_type);
413         if (rc) {
414                 BNXT_TF_DBG(ERR, "On Close: Failed to get the app type.\n");
415                 return -EINVAL;
416         }
417
418         if (curr_state == ULP_HA_STATE_PRIM_RUN &&
419             app_type == ULP_HA_APP_TYPE_PRIM) {
420                 /*
421                  * Only the primary is running, so a close effectively moves the
422                  * system back to INIT.
423                  */
424                 next_state = ULP_HA_STATE_INIT;
425                 ulp_ha_mgr_state_set(ulp_ctx, next_state);
426                 BNXT_TF_DBG(INFO, "On Close: PRIM[PRIM_RUN] => [INIT]\n");
427         } else if (curr_state == ULP_HA_STATE_PRIM_SEC_RUN &&
428                   app_type == ULP_HA_APP_TYPE_PRIM) {
429                 /*
430                  * While both are running, the primary received a close.
431                  * Cleanup the flows, set the COPY state, and wait for the
432                  * secondary to become the Primary.
433                  */
434                 BNXT_TF_DBG(INFO,
435                             "On Close: PRIM[PRIM_SEC_RUN] flushing flows.\n");
436
437                 ulp_flow_db_flush_flows(ulp_ctx, BNXT_ULP_FDB_TYPE_REGULAR);
438                 ulp_ha_mgr_state_set(ulp_ctx, ULP_HA_STATE_SEC_TIMER_COPY);
439
440                 /*
441                  * TODO: This needs to be bounded in case the other system does
442                  * not move to PRIM_RUN.
443                  */
444                 BNXT_TF_DBG(INFO,
445                             "On Close: PRIM[PRIM_SEC_RUN] => [Copy], enter wait.\n");
446                 timeout = ULP_HA_WAIT_TIMEOUT;
447                 do {
448                         rte_delay_ms(ULP_HA_WAIT_TIME);
449                         rc = ulp_ha_mgr_state_get(ulp_ctx, &poll_state);
450                         if (rc) {
451                                 BNXT_TF_DBG(ERR,
452                                             "Failed to get HA state on Close (%d)\n",
453                                             rc);
454                                 goto cleanup;
455                         }
456                         timeout -= ULP_HA_WAIT_TIME;
457                         BNXT_TF_DBG(INFO,
458                                     "On Close: Waiting %d ms for PRIM_RUN\n",
459                                     timeout);
460                 } while (poll_state != ULP_HA_STATE_PRIM_RUN && timeout > 0);
461
462                 if (timeout <= 0) {
463                         BNXT_TF_DBG(ERR, "On Close: SEC[COPY] Timed out\n");
464                         goto cleanup;
465                 }
466
467                 BNXT_TF_DBG(INFO, "On Close: PRIM[PRIM_SEC_RUN] => [COPY]\n");
468         } else if (curr_state == ULP_HA_STATE_PRIM_SEC_RUN &&
469                    app_type == ULP_HA_APP_TYPE_SEC) {
470                 /*
471                  * While both are running, the secondary unexpectedly received a
472                  * close.  Cancel the timer, set the state to Primary RUN since
473                  * it is the only one running.
474                  */
475                 ulp_ha_mgr_timer_cancel(ulp_ctx);
476                 ulp_ha_mgr_state_set(ulp_ctx, ULP_HA_STATE_PRIM_RUN);
477
478                 BNXT_TF_DBG(INFO, "On Close: SEC[PRIM_SEC_RUN] => [PRIM_RUN]\n");
479         } else if (curr_state == ULP_HA_STATE_SEC_TIMER_COPY &&
480                    app_type == ULP_HA_APP_TYPE_SEC) {
481                 /*
482                  * While both were running and the Secondary went into copy,
483                  * secondary received a close.  Wait until the former Primary
484                  * clears the copy stage, close, and set to INIT.
485                  */
486                 BNXT_TF_DBG(INFO, "On Close: SEC[COPY] wait for PRIM_RUN\n");
487
488                 timeout = ULP_HA_WAIT_TIMEOUT;
489                 do {
490                         rte_delay_ms(ULP_HA_WAIT_TIME);
491                         rc = ulp_ha_mgr_state_get(ulp_ctx, &poll_state);
492                         if (rc) {
493                                 BNXT_TF_DBG(ERR,
494                                             "Failed to get HA state on Close (%d)\n",
495                                             rc);
496                                 goto cleanup;
497                         }
498
499                         timeout -= ULP_HA_WAIT_TIME;
500                         BNXT_TF_DBG(INFO,
501                                     "On Close: Waiting %d ms for PRIM_RUN\n",
502                                     timeout);
503                 } while (poll_state != ULP_HA_STATE_PRIM_RUN &&
504                          timeout >= 0);
505
506                 if (timeout <= 0) {
507                         BNXT_TF_DBG(ERR,
508                                     "On Close: SEC[COPY] Timed out\n");
509                         goto cleanup;
510                 }
511
512                 next_state = ULP_HA_STATE_INIT;
513                 rc = ulp_ha_mgr_state_set(ulp_ctx, next_state);
514                 if (rc) {
515                         BNXT_TF_DBG(ERR,
516                                     "On Close: Failed to set state to INIT(%x)\n",
517                                     rc);
518                         goto cleanup;
519                 }
520
521                 BNXT_TF_DBG(INFO,
522                             "On Close: SEC[COPY] => [INIT] after %d ms\n",
523                             ULP_HA_WAIT_TIMEOUT - timeout);
524         } else {
525                 BNXT_TF_DBG(ERR, "On Close: Invalid type/state %d/%d\n",
526                             curr_state, app_type);
527         }
528 cleanup:
529         return rc;
530 }
531
532 int32_t
533 ulp_ha_mgr_region_get(struct bnxt_ulp_context *ulp_ctx,
534                       enum ulp_ha_mgr_region *region)
535 {
536         struct bnxt_ulp_ha_mgr_info *ha_info;
537
538         if (ulp_ctx == NULL || region == NULL) {
539                 BNXT_TF_DBG(ERR, "Invalid params in ha region get.\n");
540                 return -EINVAL;
541         }
542
543         ha_info = bnxt_ulp_cntxt_ptr2_ha_info_get(ulp_ctx);
544         if (ha_info == NULL) {
545                 BNXT_TF_DBG(ERR, "Unable to get ha info\n");
546                 return -EINVAL;
547         }
548         *region = ha_info->region;
549
550         return 0;
551 }