1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2019-2021 Broadcom
6 #include <rte_common.h>
7 #include <rte_cycles.h>
8 #include <rte_malloc.h>
10 #include <rte_alarm.h>
13 #include "bnxt_tf_common.h"
14 #include "ulp_ha_mgr.h"
15 #include "ulp_flow_db.h"
17 /* Local only MACROs and defines that aren't exported */
18 #define ULP_HA_TIMER_THREAD (1 << 0)
19 #define ULP_HA_TIMER_IS_RUNNING(info) (!!((info)->flags & ULP_HA_TIMER_THREAD))
20 #define ULP_HA_TIMER_SEC 1
21 #define ULP_HA_WAIT_TIME (MS_PER_S / 10)
22 #define ULP_HA_WAIT_TIMEOUT (MS_PER_S * 2)
24 #define ULP_HA_IF_TBL_DIR TF_DIR_RX
25 #define ULP_HA_IF_TBL_TYPE TF_IF_TBL_TYPE_PROF_PARIF_ERR_ACT_REC_PTR
26 #define ULP_HA_IF_TBL_IDX 10
27 #define ULP_HA_CLIENT_CNT_IF_TBL_IDX 9
29 static void ulp_ha_mgr_timer_cancel(void);
30 static int32_t ulp_ha_mgr_timer_start(void);
31 static void ulp_ha_mgr_timer_cb(void *arg);
32 static int32_t ulp_ha_mgr_app_type_set(struct bnxt_ulp_context *ulp_ctx,
33 enum ulp_ha_mgr_app_type app_type);
35 ulp_ha_mgr_region_set(struct bnxt_ulp_context *ulp_ctx,
36 enum ulp_ha_mgr_region region);
38 ulp_ha_mgr_state_set(struct bnxt_ulp_context *ulp_ctx,
39 enum ulp_ha_mgr_state state);
42 ulp_ha_mgr_tf_client_num_get(struct bnxt_ulp_context *ulp_ctx, uint32_t *cnt);
45 ulp_ha_mgr_state_set(struct bnxt_ulp_context *ulp_ctx,
46 enum ulp_ha_mgr_state state)
48 struct tf_set_if_tbl_entry_parms set_parms = { 0 };
53 if (ulp_ctx == NULL) {
54 BNXT_TF_DBG(ERR, "Invalid parms in state get.\n");
57 tfp = bnxt_ulp_cntxt_tfp_get(ulp_ctx, BNXT_ULP_SHARED_SESSION_NO);
59 BNXT_TF_DBG(ERR, "Unable to get the TFP.\n");
63 val = (uint32_t)state;
65 set_parms.dir = ULP_HA_IF_TBL_DIR;
66 set_parms.type = ULP_HA_IF_TBL_TYPE;
67 set_parms.data = (uint8_t *)&val;
68 set_parms.data_sz_in_bytes = sizeof(val);
69 set_parms.idx = ULP_HA_IF_TBL_IDX;
71 rc = tf_set_if_tbl_entry(tfp, &set_parms);
73 BNXT_TF_DBG(ERR, "Failed to write the HA state\n");
79 ulp_ha_mgr_tf_client_num_get(struct bnxt_ulp_context *ulp_ctx,
82 struct tf_get_if_tbl_entry_parms get_parms = { 0 };
87 if (ulp_ctx == NULL || cnt == NULL) {
88 BNXT_TF_DBG(ERR, "Invalid parms in client num get.\n");
91 tfp = bnxt_ulp_cntxt_tfp_get(ulp_ctx, BNXT_ULP_SHARED_SESSION_NO);
93 BNXT_TF_DBG(ERR, "Unable to get the TFP.\n");
97 get_parms.dir = ULP_HA_IF_TBL_DIR;
98 get_parms.type = ULP_HA_IF_TBL_TYPE;
99 get_parms.idx = ULP_HA_CLIENT_CNT_IF_TBL_IDX;
100 get_parms.data = (uint8_t *)&val;
101 get_parms.data_sz_in_bytes = sizeof(val);
103 rc = tf_get_if_tbl_entry(tfp, &get_parms);
105 BNXT_TF_DBG(ERR, "Failed to read the number of HA clients\n");
112 ulp_ha_mgr_region_set(struct bnxt_ulp_context *ulp_ctx,
113 enum ulp_ha_mgr_region region)
115 struct bnxt_ulp_ha_mgr_info *ha_info;
117 if (ulp_ctx == NULL) {
118 BNXT_TF_DBG(ERR, "Invalid params in ha region get.\n");
122 ha_info = bnxt_ulp_cntxt_ptr2_ha_info_get(ulp_ctx);
123 if (ha_info == NULL) {
124 BNXT_TF_DBG(ERR, "Unable to get ha info\n");
127 ha_info->region = region;
133 ulp_ha_mgr_app_type_set(struct bnxt_ulp_context *ulp_ctx,
134 enum ulp_ha_mgr_app_type app_type)
136 struct bnxt_ulp_ha_mgr_info *ha_info;
138 if (ulp_ctx == NULL) {
139 BNXT_TF_DBG(ERR, "Invalid Parms.\n");
143 ha_info = bnxt_ulp_cntxt_ptr2_ha_info_get(ulp_ctx);
144 if (ha_info == NULL) {
145 BNXT_TF_DBG(ERR, "Unable to get the ha info.\n");
148 ha_info->app_type = app_type;
154 ulp_ha_mgr_timer_cb(void *arg __rte_unused)
156 struct tf_move_tcam_shared_entries_parms mparms = { 0 };
157 struct tf_clear_tcam_shared_entries_parms cparms = { 0 };
158 struct bnxt_ulp_context *ulp_ctx;
159 enum ulp_ha_mgr_state curr_state;
160 enum ulp_ha_mgr_app_type app_type;
161 uint8_t myclient_cnt = 0;
162 uint32_t client_cnt = 0;
166 ulp_ctx = bnxt_ulp_cntxt_entry_acquire();
167 if (ulp_ctx == NULL) {
168 ulp_ha_mgr_timer_start();
172 myclient_cnt = bnxt_ulp_cntxt_num_shared_clients_get(ulp_ctx);
173 if (myclient_cnt == 0) {
175 "PANIC Client Count is zero kill timer\n.");
179 tfp = bnxt_ulp_cntxt_tfp_get(ulp_ctx, BNXT_ULP_SHARED_SESSION_YES);
181 BNXT_TF_DBG(ERR, "Unable to get the TFP.\n");
185 rc = ulp_ha_mgr_state_get(ulp_ctx, &curr_state);
188 * This shouldn't happen, if it does, reset the timer
189 * and try again next time.
191 BNXT_TF_DBG(ERR, "Failed(%d) to get state.\n",
196 rc = ulp_ha_mgr_tf_client_num_get(ulp_ctx, &client_cnt);
198 BNXT_TF_DBG(ERR, "Failed(%d) to get cnt.\n",
203 rc = ulp_ha_mgr_app_type_get(ulp_ctx, &app_type);
205 BNXT_TF_DBG(ERR, "Failed(%d) to get type.\n",
210 /* Handle the Cleanup if an app went away */
211 if (client_cnt == myclient_cnt) {
212 if (curr_state == ULP_HA_STATE_PRIM_SEC_RUN &&
213 app_type == ULP_HA_APP_TYPE_PRIM) {
215 * The SECONDARY went away:
216 * 1. Set the state to PRIM_RUN
217 * 2. Clear the High region so our TCAM will hit.
219 rc = ulp_ha_mgr_state_set(ulp_ctx,
220 ULP_HA_STATE_PRIM_RUN);
223 "On HA CB:Failed(%d) to set state\n",
228 cparms.dir = TF_DIR_RX;
229 cparms.tcam_tbl_type =
230 TF_TCAM_TBL_TYPE_WC_TCAM_HIGH;
231 rc = tf_clear_tcam_shared_entries(tfp, &cparms);
234 "On HA CB:Failed(%d) clear tcam\n",
238 } else if (curr_state == ULP_HA_STATE_PRIM_SEC_RUN &&
239 app_type == ULP_HA_APP_TYPE_SEC) {
241 * The PRIMARY went away:
242 * 1. Set the state to SEC_COPY
243 * 2. Clear the Low Region for the next copy
245 rc = ulp_ha_mgr_state_set(ulp_ctx,
246 ULP_HA_STATE_SEC_TIMER_COPY);
249 "On HA CB:Failed(%d) to set state\n",
253 curr_state = ULP_HA_STATE_SEC_TIMER_COPY;
257 /* Only the Secondary has work to on SEC_TIMER_COPY */
258 if (curr_state != ULP_HA_STATE_SEC_TIMER_COPY ||
259 app_type != ULP_HA_APP_TYPE_SEC)
262 /* Protect the flow database during the copy */
263 if (bnxt_ulp_cntxt_acquire_fdb_lock(ulp_ctx)) {
264 /* Should not fail, if we do, restart timer and try again */
265 BNXT_TF_DBG(ERR, "Flow db lock acquire failed\n");
268 /* All paths after this point must release the fdb lock */
270 /* The Primary has issued a close and we are in the timer copy
271 * phase. Become the new Primary, Set state to Primary Run and
272 * move WC entries to Low Region.
274 BNXT_TF_DBG(INFO, "On HA CB: Moving entries HI to LOW\n");
276 cparms.dir = TF_DIR_RX;
277 cparms.tcam_tbl_type = TF_TCAM_TBL_TYPE_WC_TCAM_LOW;
278 rc = tf_clear_tcam_shared_entries(tfp, &cparms);
281 "On HA CB:Failed(%d) clear tcam low\n",
286 mparms.dir = TF_DIR_RX;
287 mparms.tcam_tbl_type = TF_TCAM_TBL_TYPE_WC_TCAM_HIGH;
288 rc = tf_move_tcam_shared_entries(tfp, &mparms);
290 BNXT_TF_DBG(ERR, "On HA_CB: Failed to move entries\n");
294 ulp_ha_mgr_region_set(ulp_ctx, ULP_HA_REGION_LOW);
295 ulp_ha_mgr_app_type_set(ulp_ctx, ULP_HA_APP_TYPE_PRIM);
296 ulp_ha_mgr_state_set(ulp_ctx, ULP_HA_STATE_PRIM_RUN);
297 BNXT_TF_DBG(INFO, "On HA CB: SEC[SEC_TIMER_COPY] => PRIM[PRIM_RUN]\n");
299 bnxt_ulp_cntxt_release_fdb_lock(ulp_ctx);
301 bnxt_ulp_cntxt_entry_release();
302 ulp_ha_mgr_timer_start();
306 ulp_ha_mgr_timer_start(void)
308 rte_eal_alarm_set(US_PER_S * ULP_HA_TIMER_SEC,
309 ulp_ha_mgr_timer_cb, NULL);
314 ulp_ha_mgr_timer_cancel(void)
316 rte_eal_alarm_cancel(ulp_ha_mgr_timer_cb, (void *)NULL);
320 ulp_ha_mgr_init(struct bnxt_ulp_context *ulp_ctx)
322 struct bnxt_ulp_ha_mgr_info *ha_info;
324 ha_info = rte_zmalloc("ulp_ha_mgr_info", sizeof(*ha_info), 0);
328 /* Add the HA info tbl to the ulp context. */
329 bnxt_ulp_cntxt_ptr2_ha_info_set(ulp_ctx, ha_info);
331 rc = pthread_mutex_init(&ha_info->ha_lock, NULL);
333 PMD_DRV_LOG(ERR, "Failed to initialize ha mutex\n");
336 rc = ulp_ha_mgr_timer_start();
338 BNXT_TF_DBG(ERR, "Unable to start timer CB.\n");
345 ulp_ha_mgr_deinit(ulp_ctx);
350 ulp_ha_mgr_deinit(struct bnxt_ulp_context *ulp_ctx)
352 struct bnxt_ulp_ha_mgr_info *ha_info;
354 ulp_ha_mgr_timer_cancel();
356 ha_info = bnxt_ulp_cntxt_ptr2_ha_info_get(ulp_ctx);
357 if (ha_info == NULL) {
358 BNXT_TF_DBG(ERR, "Unable to get HA Info for deinit.\n");
362 pthread_mutex_destroy(&ha_info->ha_lock);
365 bnxt_ulp_cntxt_ptr2_ha_info_set(ulp_ctx, NULL);
369 ulp_ha_mgr_app_type_get(struct bnxt_ulp_context *ulp_ctx,
370 enum ulp_ha_mgr_app_type *app_type)
372 struct bnxt_ulp_ha_mgr_info *ha_info;
374 if (ulp_ctx == NULL || app_type == NULL) {
375 BNXT_TF_DBG(ERR, "Invalid Parms.\n");
379 ha_info = bnxt_ulp_cntxt_ptr2_ha_info_get(ulp_ctx);
380 if (ha_info == NULL) {
381 BNXT_TF_DBG(ERR, "Unable to get the HA info.\n");
384 *app_type = ha_info->app_type;
390 ulp_ha_mgr_state_get(struct bnxt_ulp_context *ulp_ctx,
391 enum ulp_ha_mgr_state *state)
393 struct tf_get_if_tbl_entry_parms get_parms = { 0 };
398 if (ulp_ctx == NULL || state == NULL) {
399 BNXT_TF_DBG(ERR, "Invalid parms in state get.\n");
402 tfp = bnxt_ulp_cntxt_tfp_get(ulp_ctx, BNXT_ULP_SHARED_SESSION_NO);
404 BNXT_TF_DBG(ERR, "Unable to get the TFP.\n");
408 get_parms.dir = ULP_HA_IF_TBL_DIR;
409 get_parms.type = ULP_HA_IF_TBL_TYPE;
410 get_parms.idx = ULP_HA_IF_TBL_IDX;
411 get_parms.data = (uint8_t *)&val;
412 get_parms.data_sz_in_bytes = sizeof(val);
414 rc = tf_get_if_tbl_entry(tfp, &get_parms);
416 BNXT_TF_DBG(ERR, "Failed to read the HA state\n");
423 ulp_ha_mgr_open(struct bnxt_ulp_context *ulp_ctx)
425 enum ulp_ha_mgr_state curr_state;
428 rc = ulp_ha_mgr_state_get(ulp_ctx, &curr_state);
430 BNXT_TF_DBG(ERR, "Failed to get HA state on Open (%d)\n", rc);
435 * An Open can only occur during the Init and Primary Run states. During
436 * Init, the system attempting to Open will become the only system
437 * running. During Primary Run, the system attempting to Open will
438 * become the secondary system temporarily, and should eventually be
439 * transitioned to the primary system.
441 switch (curr_state) {
442 case ULP_HA_STATE_INIT:
444 * No system is running, as we are the primary. Since no other
445 * system is running, we start writing into the low region. By
446 * writing into the low region, we save room for the secondary
447 * system to override our entries by using the high region.
449 ulp_ha_mgr_app_type_set(ulp_ctx, ULP_HA_APP_TYPE_PRIM);
450 ulp_ha_mgr_region_set(ulp_ctx, ULP_HA_REGION_LOW);
451 rc = ulp_ha_mgr_state_set(ulp_ctx, ULP_HA_STATE_PRIM_RUN);
453 BNXT_TF_DBG(ERR, "On Open: Failed to set PRIM_RUN.\n");
457 BNXT_TF_DBG(INFO, "On Open: [INIT] => PRIM[PRIM_RUN]\n");
459 case ULP_HA_STATE_PRIM_RUN:
461 * The secondary system is starting in order to take over.
462 * The current primary is expected to eventually close and pass
463 * full control to this system;however, until the primary closes
464 * both are operational.
466 ulp_ha_mgr_app_type_set(ulp_ctx, ULP_HA_APP_TYPE_SEC);
467 ulp_ha_mgr_region_set(ulp_ctx, ULP_HA_REGION_HI);
469 rc = ulp_ha_mgr_state_set(ulp_ctx, ULP_HA_STATE_PRIM_SEC_RUN);
471 BNXT_TF_DBG(ERR, "On Open: Failed to set PRIM_SEC_RUN\n");
474 BNXT_TF_DBG(INFO, "On Open: [PRIM_RUN] => [PRIM_SEC_RUN]\n");
477 BNXT_TF_DBG(ERR, "On Open: Unknown state 0x%x\n", curr_state);
485 ulp_ha_mgr_close(struct bnxt_ulp_context *ulp_ctx)
487 enum ulp_ha_mgr_state curr_state, next_state, poll_state;
488 enum ulp_ha_mgr_app_type app_type;
492 curr_state = ULP_HA_STATE_INIT;
493 app_type = ULP_HA_APP_TYPE_NONE;
494 rc = ulp_ha_mgr_state_get(ulp_ctx, &curr_state);
496 BNXT_TF_DBG(ERR, "On Close: Failed(%d) to get HA state\n", rc);
500 rc = ulp_ha_mgr_app_type_get(ulp_ctx, &app_type);
502 BNXT_TF_DBG(ERR, "On Close: Failed to get the app type.\n");
506 if (curr_state == ULP_HA_STATE_PRIM_RUN &&
507 app_type == ULP_HA_APP_TYPE_PRIM) {
509 * Only the primary is running, so a close effectively moves the
510 * system back to INIT.
512 next_state = ULP_HA_STATE_INIT;
513 ulp_ha_mgr_state_set(ulp_ctx, next_state);
514 BNXT_TF_DBG(INFO, "On Close: PRIM[PRIM_RUN] => [INIT]\n");
515 } else if (curr_state == ULP_HA_STATE_PRIM_SEC_RUN &&
516 app_type == ULP_HA_APP_TYPE_PRIM) {
518 * While both are running, the primary received a close.
519 * Cleanup the flows, set the COPY state, and wait for the
520 * secondary to become the Primary.
523 "On Close: PRIM[PRIM_SEC_RUN] flushing flows.\n");
525 ulp_flow_db_flush_flows(ulp_ctx, BNXT_ULP_FDB_TYPE_REGULAR);
526 ulp_ha_mgr_state_set(ulp_ctx, ULP_HA_STATE_SEC_TIMER_COPY);
529 * TODO: This needs to be bounded in case the other system does
530 * not move to PRIM_RUN.
533 "On Close: PRIM[PRIM_SEC_RUN] => [Copy], enter wait.\n");
534 timeout = ULP_HA_WAIT_TIMEOUT;
536 rte_delay_ms(ULP_HA_WAIT_TIME);
537 rc = ulp_ha_mgr_state_get(ulp_ctx, &poll_state);
540 "Failed to get HA state on Close (%d)\n",
544 timeout -= ULP_HA_WAIT_TIME;
546 "On Close: Waiting %d ms for PRIM_RUN\n",
548 } while (poll_state != ULP_HA_STATE_PRIM_RUN && timeout > 0);
551 BNXT_TF_DBG(ERR, "On Close: SEC[COPY] Timed out\n");
555 BNXT_TF_DBG(INFO, "On Close: PRIM[PRIM_SEC_RUN] => [COPY]\n");
556 } else if (curr_state == ULP_HA_STATE_PRIM_SEC_RUN &&
557 app_type == ULP_HA_APP_TYPE_SEC) {
559 * While both are running, the secondary unexpectedly received a
562 ulp_ha_mgr_state_set(ulp_ctx, ULP_HA_STATE_PRIM_RUN);
564 BNXT_TF_DBG(INFO, "On Close: SEC[PRIM_SEC_RUN] => [PRIM_RUN]\n");
565 } else if (curr_state == ULP_HA_STATE_SEC_TIMER_COPY &&
566 app_type == ULP_HA_APP_TYPE_SEC) {
568 * While both were running and the Secondary went into copy,
569 * secondary received a close. Wait until the former Primary
570 * clears the copy stage, close, and set to INIT.
572 BNXT_TF_DBG(INFO, "On Close: SEC[COPY] wait for PRIM_RUN\n");
574 timeout = ULP_HA_WAIT_TIMEOUT;
576 rte_delay_ms(ULP_HA_WAIT_TIME);
577 rc = ulp_ha_mgr_state_get(ulp_ctx, &poll_state);
580 "Failed to get HA state on Close (%d)\n",
585 timeout -= ULP_HA_WAIT_TIME;
587 "On Close: Waiting %d ms for PRIM_RUN\n",
589 } while (poll_state != ULP_HA_STATE_PRIM_RUN &&
594 "On Close: SEC[COPY] Timed out\n");
598 next_state = ULP_HA_STATE_INIT;
599 rc = ulp_ha_mgr_state_set(ulp_ctx, next_state);
602 "On Close: Failed to set state to INIT(%x)\n",
608 "On Close: SEC[COPY] => [INIT] after %d ms\n",
609 ULP_HA_WAIT_TIMEOUT - timeout);
611 BNXT_TF_DBG(ERR, "On Close: Invalid type/state %d/%d\n",
612 curr_state, app_type);
619 ulp_ha_mgr_region_get(struct bnxt_ulp_context *ulp_ctx,
620 enum ulp_ha_mgr_region *region)
622 struct bnxt_ulp_ha_mgr_info *ha_info;
624 if (ulp_ctx == NULL || region == NULL) {
625 BNXT_TF_DBG(ERR, "Invalid params in ha region get.\n");
629 ha_info = bnxt_ulp_cntxt_ptr2_ha_info_get(ulp_ctx);
630 if (ha_info == NULL) {
631 BNXT_TF_DBG(ERR, "Unable to get ha info\n");
634 *region = ha_info->region;