Tests showed that the function inlining caused performance drop
on some x86 platforms with the memory ordering patches applied.
By force no-inline functions, the performance was better than
before on x86 and no impact to arm64 platforms.
Besides inlines of other functions are removed to let compiler
to decide whether to inline.
Suggested-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Acked-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
/*
* Converts given depth value to its corresponding range value.
*/
/*
* Converts given depth value to its corresponding range value.
*/
-static inline uint32_t __attribute__((pure))
+static uint32_t __attribute__((pure))
depth_to_range(uint8_t depth)
{
VERIFY_DEPTH(depth);
depth_to_range(uint8_t depth)
{
VERIFY_DEPTH(depth);
* are stored in the rule table from 0 - 31.
* NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
*/
* are stored in the rule table from 0 - 31.
* NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
*/
rule_add_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth,
uint8_t next_hop)
{
rule_add_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth,
uint8_t next_hop)
{
rule_add_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth,
uint32_t next_hop)
{
rule_add_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth,
uint32_t next_hop)
{
* Delete a rule from the rule table.
* NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
*/
* Delete a rule from the rule table.
* NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
*/
rule_delete_v20(struct rte_lpm_v20 *lpm, int32_t rule_index, uint8_t depth)
{
int i;
rule_delete_v20(struct rte_lpm_v20 *lpm, int32_t rule_index, uint8_t depth)
{
int i;
lpm->rule_info[depth - 1].used_rules--;
}
lpm->rule_info[depth - 1].used_rules--;
}
rule_delete_v1604(struct rte_lpm *lpm, int32_t rule_index, uint8_t depth)
{
int i;
rule_delete_v1604(struct rte_lpm *lpm, int32_t rule_index, uint8_t depth)
{
int i;
* Finds a rule in rule table.
* NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
*/
* Finds a rule in rule table.
* NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
*/
rule_find_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth)
{
uint32_t rule_gindex, last_rule, rule_index;
rule_find_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth)
{
uint32_t rule_gindex, last_rule, rule_index;
rule_find_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth)
{
uint32_t rule_gindex, last_rule, rule_index;
rule_find_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth)
{
uint32_t rule_gindex, last_rule, rule_index;
/*
* Find, clean and allocate a tbl8.
*/
/*
* Find, clean and allocate a tbl8.
*/
tbl8_alloc_v20(struct rte_lpm_tbl_entry_v20 *tbl8)
{
uint32_t group_idx; /* tbl8 group index. */
tbl8_alloc_v20(struct rte_lpm_tbl_entry_v20 *tbl8)
{
uint32_t group_idx; /* tbl8 group index. */
tbl8_alloc_v1604(struct rte_lpm_tbl_entry *tbl8, uint32_t number_tbl8s)
{
uint32_t group_idx; /* tbl8 group index. */
tbl8_alloc_v1604(struct rte_lpm_tbl_entry *tbl8, uint32_t number_tbl8s)
{
uint32_t group_idx; /* tbl8 group index. */
tbl8_free_v20(struct rte_lpm_tbl_entry_v20 *tbl8, uint32_t tbl8_group_start)
{
/* Set tbl8 group invalid*/
tbl8[tbl8_group_start].valid_group = INVALID;
}
tbl8_free_v20(struct rte_lpm_tbl_entry_v20 *tbl8, uint32_t tbl8_group_start)
{
/* Set tbl8 group invalid*/
tbl8[tbl8_group_start].valid_group = INVALID;
}
tbl8_free_v1604(struct rte_lpm_tbl_entry *tbl8, uint32_t tbl8_group_start)
{
/* Set tbl8 group invalid*/
tbl8[tbl8_group_start].valid_group = INVALID;
}
tbl8_free_v1604(struct rte_lpm_tbl_entry *tbl8, uint32_t tbl8_group_start)
{
/* Set tbl8 group invalid*/
tbl8[tbl8_group_start].valid_group = INVALID;
}
+static __rte_noinline int32_t
add_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
uint8_t next_hop)
{
add_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
uint8_t next_hop)
{
+static __rte_noinline int32_t
add_depth_small_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
uint32_t next_hop)
{
add_depth_small_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
uint32_t next_hop)
{
+static __rte_noinline int32_t
add_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth,
uint8_t next_hop)
{
add_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth,
uint8_t next_hop)
{
+static __rte_noinline int32_t
add_depth_big_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth,
uint32_t next_hop)
{
add_depth_big_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth,
uint32_t next_hop)
{
MAP_STATIC_SYMBOL(int rte_lpm_is_rule_present(struct rte_lpm *lpm, uint32_t ip,
uint8_t depth, uint32_t *next_hop), rte_lpm_is_rule_present_v1604);
MAP_STATIC_SYMBOL(int rte_lpm_is_rule_present(struct rte_lpm *lpm, uint32_t ip,
uint8_t depth, uint32_t *next_hop), rte_lpm_is_rule_present_v1604);
find_previous_rule_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
uint8_t *sub_rule_depth)
{
find_previous_rule_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
uint8_t *sub_rule_depth)
{
find_previous_rule_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
uint8_t *sub_rule_depth)
{
find_previous_rule_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
uint8_t *sub_rule_depth)
{
delete_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
{
delete_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
{
delete_depth_small_v1604(struct rte_lpm *lpm, uint32_t ip_masked,
uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
{
delete_depth_small_v1604(struct rte_lpm *lpm, uint32_t ip_masked,
uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
{
* Return of value > -1 means tbl8 is in use but has all the same values and
* thus can be recycled
*/
* Return of value > -1 means tbl8 is in use but has all the same values and
* thus can be recycled
*/
tbl8_recycle_check_v20(struct rte_lpm_tbl_entry_v20 *tbl8,
uint32_t tbl8_group_start)
{
tbl8_recycle_check_v20(struct rte_lpm_tbl_entry_v20 *tbl8,
uint32_t tbl8_group_start)
{
tbl8_recycle_check_v1604(struct rte_lpm_tbl_entry *tbl8,
uint32_t tbl8_group_start)
{
tbl8_recycle_check_v1604(struct rte_lpm_tbl_entry *tbl8,
uint32_t tbl8_group_start)
{
delete_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
{
delete_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
{
delete_depth_big_v1604(struct rte_lpm *lpm, uint32_t ip_masked,
uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
{
delete_depth_big_v1604(struct rte_lpm *lpm, uint32_t ip_masked,
uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
{