Projects
openEuler:24.03:SP1:Everything:64G
gcc
_service:tar_scm:0036-rtl-ifcvt-introduce-rtl-i...
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File _service:tar_scm:0036-rtl-ifcvt-introduce-rtl-ifcvt-enchancements.patch of Package gcc
From 4cae948c1c00ad7a59f0f234f809fbd9a0208eb4 Mon Sep 17 00:00:00 2001 From: vchernon <chernonog.vyacheslav@huawei.com> Date: Wed, 28 Feb 2024 23:05:12 +0800 Subject: [PATCH 02/18] [rtl-ifcvt] introduce rtl ifcvt enchancements new option: -fifcvt-allow-complicated-cmps: allows ifcvt to deal with complicated cmps like cmp reg1 (reg2 + reg3) can increase compilation time new param: -param=ifcvt-allow-register-renaming=[0,1,2] 1 : allows ifcvt to rename registers in then and else bb 2 : allows to rename registers in condition and else/then bb can increase compilation time and register pressure --- gcc/common.opt | 4 + gcc/ifcvt.cc | 291 +++++++++++++++--- gcc/params.opt | 4 + .../gcc.c-torture/execute/ifcvt-renaming-1.c | 35 +++ gcc/testsuite/gcc.dg/ifcvt-6.c | 27 ++ 5 files changed, 311 insertions(+), 50 deletions(-) create mode 100644 gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c create mode 100644 gcc/testsuite/gcc.dg/ifcvt-6.c diff --git a/gcc/common.opt b/gcc/common.opt index c7c6bc256..aa00fb7b0 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -3691,4 +3691,8 @@ fipa-ra Common Var(flag_ipa_ra) Optimization Use caller save register across calls if possible. +fifcvt-allow-complicated-cmps +Common Var(flag_ifcvt_allow_complicated_cmps) Optimization +Allow RTL if-conversion pass to deal with complicated cmps (can increase compilation time). + ; This comment is to ensure we retain the blank line above. diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc index 2c1eba312..584db7b55 100644 --- a/gcc/ifcvt.cc +++ b/gcc/ifcvt.cc @@ -886,7 +886,9 @@ noce_emit_store_flag (struct noce_if_info *if_info, rtx x, int reversep, } /* Don't even try if the comparison operands or the mode of X are weird. */ - if (cond_complex || !SCALAR_INT_MODE_P (GET_MODE (x))) + if (!flag_ifcvt_allow_complicated_cmps + && (cond_complex + || !SCALAR_INT_MODE_P (GET_MODE (x)))) return NULL_RTX; return emit_store_flag (x, code, XEXP (cond, 0), @@ -1965,7 +1967,8 @@ insn_valid_noce_process_p (rtx_insn *insn, rtx cc) /* Currently support only simple single sets in test_bb. */ if (!sset || !noce_operand_ok (SET_DEST (sset)) - || contains_ccmode_rtx_p (SET_DEST (sset)) + || (!flag_ifcvt_allow_complicated_cmps + && contains_ccmode_rtx_p (SET_DEST (sset))) || !noce_operand_ok (SET_SRC (sset))) return false; @@ -1979,13 +1982,17 @@ insn_valid_noce_process_p (rtx_insn *insn, rtx cc) in this function. */ static bool -bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename) +bbs_ok_for_cmove_arith (basic_block bb_a, + basic_block bb_b, + rtx to_rename, + bitmap conflict_regs) { rtx_insn *a_insn; bitmap bba_sets = BITMAP_ALLOC (®_obstack); - + bitmap intersections = BITMAP_ALLOC (®_obstack); df_ref def; df_ref use; + rtx_insn *last_a = last_active_insn (bb_a, FALSE); FOR_BB_INSNS (bb_a, a_insn) { @@ -1995,18 +2002,15 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename) rtx sset_a = single_set (a_insn); if (!sset_a) - { - BITMAP_FREE (bba_sets); - return false; - } + goto end_cmove_arith_check_and_fail; /* Record all registers that BB_A sets. */ FOR_EACH_INSN_DEF (def, a_insn) - if (!(to_rename && DF_REF_REG (def) == to_rename)) + if (!(to_rename && DF_REF_REG (def) == to_rename && a_insn == last_a)) bitmap_set_bit (bba_sets, DF_REF_REGNO (def)); } + bitmap_and (intersections, df_get_live_in (bb_b), bba_sets); rtx_insn *b_insn; - FOR_BB_INSNS (bb_b, b_insn) { if (!active_insn_p (b_insn)) @@ -2015,10 +2019,7 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename) rtx sset_b = single_set (b_insn); if (!sset_b) - { - BITMAP_FREE (bba_sets); - return false; - } + goto end_cmove_arith_check_and_fail; /* Make sure this is a REG and not some instance of ZERO_EXTRACT or SUBREG or other dangerous stuff. @@ -2030,25 +2031,34 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename) if (MEM_P (SET_DEST (sset_b))) gcc_assert (rtx_equal_p (SET_DEST (sset_b), to_rename)); else if (!REG_P (SET_DEST (sset_b))) - { - BITMAP_FREE (bba_sets); - return false; - } + goto end_cmove_arith_check_and_fail; - /* If the insn uses a reg set in BB_A return false. */ + /* If the insn uses a reg set in BB_A return false + or try to collect register list for renaming. */ FOR_EACH_INSN_USE (use, b_insn) { - if (bitmap_bit_p (bba_sets, DF_REF_REGNO (use))) + if (bitmap_bit_p (intersections, DF_REF_REGNO (use))) { - BITMAP_FREE (bba_sets); - return false; + if (param_ifcvt_allow_register_renaming < 1) + goto end_cmove_arith_check_and_fail; + + /* Those regs should be renamed. We can't rename CC reg, but + possibly we can provide combined comparison in the future. */ + if (GET_MODE_CLASS (GET_MODE (DF_REF_REG (use))) == MODE_CC) + goto end_cmove_arith_check_and_fail; + bitmap_set_bit (conflict_regs, DF_REF_REGNO (use)); } } - } BITMAP_FREE (bba_sets); + BITMAP_FREE (intersections); return true; + +end_cmove_arith_check_and_fail: + BITMAP_FREE (bba_sets); + BITMAP_FREE (intersections); + return false; } /* Emit copies of all the active instructions in BB except the last. @@ -2103,6 +2113,142 @@ noce_emit_bb (rtx last_insn, basic_block bb, bool simple) return true; } +/* This function tries to rename regs that intersect with considered bb + inside condition expression. Condition expression will be moved down + if the optimization will be applied, so it is essential to be sure that + all intersected registers will be renamed otherwise transformation + can't be applied. Function returns true if renaming was successful + and optimization can proceed futher. */ + +static bool +noce_rename_regs_in_cond (struct noce_if_info *if_info, bitmap cond_rename_regs) +{ + bool success = true; + if (bitmap_empty_p (cond_rename_regs)) + return true; + if (param_ifcvt_allow_register_renaming < 2) + return false; + df_ref use; + rtx_insn *cmp_insn = if_info->cond_earliest; + /* Jump instruction as a condion currently unsupported. */ + if (JUMP_P (cmp_insn)) + return false; + rtx_insn *before_cmp = PREV_INSN (cmp_insn); + start_sequence (); + rtx_insn *copy_of_cmp = as_a <rtx_insn *> (copy_rtx (cmp_insn)); + basic_block cmp_block = BLOCK_FOR_INSN (cmp_insn); + FOR_EACH_INSN_USE (use, cmp_insn) + { + if (bitmap_bit_p (cond_rename_regs, DF_REF_REGNO (use))) + { + rtx use_reg = DF_REF_REG (use); + rtx tmp = gen_reg_rtx (GET_MODE (use_reg)); + if (!validate_replace_rtx (use_reg, tmp, copy_of_cmp)) + { + end_sequence (); + return false; + } + noce_emit_move_insn (tmp, use_reg); + } + } + + emit_insn (PATTERN (copy_of_cmp)); + rtx_insn *seq = get_insns (); + unshare_all_rtl_in_chain (seq); + end_sequence (); + + emit_insn_after_setloc (seq, before_cmp, INSN_LOCATION (cmp_insn)); + delete_insn_and_edges (cmp_insn); + rtx_insn *insn; + FOR_BB_INSNS (cmp_block, insn) + df_insn_rescan (insn); + + if_info->cond = noce_get_condition (if_info->jump, + ©_of_cmp, + if_info->then_else_reversed); + if_info->cond_earliest = copy_of_cmp; + if_info->rev_cond = NULL_RTX; + + return success; +} + +/* This function tries to rename regs that intersect with considered bb. + return true if the renaming was successful and optimization can + proceed futher, false otherwise. */ +static bool +noce_rename_regs_in_bb (basic_block test_bb, bitmap rename_regs) +{ + if (bitmap_empty_p (rename_regs)) + return true; + rtx_insn *insn; + rtx_insn *last_insn = last_active_insn (test_bb, FALSE); + bool res = true; + start_sequence (); + FOR_BB_INSNS (test_bb, insn) + { + if (!active_insn_p (insn)) + continue; + /* Only ssets are supported for now. */ + rtx sset = single_set (insn); + gcc_assert (sset); + rtx x = SET_DEST (sset); + if (!REG_P (x) || !bitmap_bit_p (rename_regs, REGNO (x))) + continue; + /* Do not need to rename dest in the last instruction + it will be renamed anyway. */ + if (insn == last_insn) + continue; + machine_mode mode = GET_MODE (x); + rtx tmp = gen_reg_rtx (mode); + if (!validate_replace_rtx_part (x, tmp, &SET_DEST (sset), insn)) + { + gcc_assert (insn != last_insn); + /* We can generate additional move for such case, + but it will increase register preasure. + For now just stop transformation. */ + rtx result_rtx = SET_DEST (single_set (last_insn)); + if (REG_P (result_rtx) && (x != result_rtx)) + { + res = false; + break; + } + if (!validate_replace_rtx (x, tmp, insn)) + gcc_unreachable (); + noce_emit_move_insn (tmp,x); + } + set_used_flags (insn); + rtx_insn *rename_candidate; + for (rename_candidate = NEXT_INSN (insn); + rename_candidate && rename_candidate!= NEXT_INSN (BB_END (test_bb)); + rename_candidate = NEXT_INSN (rename_candidate)) + { + if (!reg_overlap_mentioned_p (x, rename_candidate)) + continue; + + int replace_res = TRUE; + if (rename_candidate == last_insn) + { + validate_replace_src_group (x, tmp, rename_candidate); + replace_res = apply_change_group (); + } + else + replace_res = validate_replace_rtx (x, tmp, rename_candidate); + gcc_assert (replace_res); + set_used_flags (rename_candidate); + } + set_used_flags (x); + set_used_flags (tmp); + } + rtx_insn *seq = get_insns (); + unshare_all_rtl_in_chain (seq); + end_sequence (); + emit_insn_before_setloc (seq, first_active_insn (test_bb), + INSN_LOCATION (first_active_insn (test_bb))); + FOR_BB_INSNS (test_bb, insn) + df_insn_rescan (insn); + return res; +} + /* Try more complex cases involving conditional_move. */ static int @@ -2185,11 +2331,30 @@ noce_try_cmove_arith (struct noce_if_info *if_info) std::swap (then_bb, else_bb); } } - + bitmap else_bb_rename_regs = BITMAP_ALLOC (®_obstack); + bitmap then_bb_rename_regs = BITMAP_ALLOC (®_obstack); if (then_bb && else_bb - && (!bbs_ok_for_cmove_arith (then_bb, else_bb, if_info->orig_x) - || !bbs_ok_for_cmove_arith (else_bb, then_bb, if_info->orig_x))) - return FALSE; + && (!bbs_ok_for_cmove_arith (then_bb, else_bb, + if_info->orig_x, + then_bb_rename_regs) + || !bbs_ok_for_cmove_arith (else_bb, then_bb, + if_info->orig_x, + else_bb_rename_regs))) + { + BITMAP_FREE (then_bb_rename_regs); + BITMAP_FREE (else_bb_rename_regs); + return FALSE; + } + bool prepass_renaming = noce_rename_regs_in_bb (then_bb, + then_bb_rename_regs) + && noce_rename_regs_in_bb (else_bb, + else_bb_rename_regs); + + BITMAP_FREE (then_bb_rename_regs); + BITMAP_FREE (else_bb_rename_regs); + + if (!prepass_renaming) + return FALSE; start_sequence (); @@ -3072,7 +3237,8 @@ noce_operand_ok (const_rtx op) static bool bb_valid_for_noce_process_p (basic_block test_bb, rtx cond, - unsigned int *cost, bool *simple_p) + unsigned int *cost, bool *simple_p, + bitmap cond_rename_regs) { if (!test_bb) return false; @@ -3112,8 +3278,9 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond, rtx_insn *prev_last_insn = PREV_INSN (last_insn); gcc_assert (prev_last_insn); - /* For now, disallow setting x multiple times in test_bb. */ - if (REG_P (x) && reg_set_between_p (x, first_insn, prev_last_insn)) + if (REG_P (x) + && reg_set_between_p (x, first_insn, prev_last_insn) + && param_ifcvt_allow_register_renaming < 1) return false; bitmap test_bb_temps = BITMAP_ALLOC (®_obstack); @@ -3125,25 +3292,35 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond, rtx_insn *insn; FOR_BB_INSNS (test_bb, insn) { - if (insn != last_insn) - { - if (!active_insn_p (insn)) - continue; + if (insn == last_insn) + continue; + if (!active_insn_p (insn)) + continue; - if (!insn_valid_noce_process_p (insn, cc)) - goto free_bitmap_and_fail; + if (!insn_valid_noce_process_p (insn, cc)) + goto free_bitmap_and_fail; - rtx sset = single_set (insn); - gcc_assert (sset); + rtx sset = single_set (insn); + gcc_assert (sset); - if (contains_mem_rtx_p (SET_SRC (sset)) - || !REG_P (SET_DEST (sset)) - || reg_overlap_mentioned_p (SET_DEST (sset), cond)) - goto free_bitmap_and_fail; + if (contains_mem_rtx_p (SET_SRC (sset)) + || !REG_P (SET_DEST (sset))) + goto free_bitmap_and_fail; - potential_cost += pattern_cost (sset, speed_p); - bitmap_set_bit (test_bb_temps, REGNO (SET_DEST (sset))); + if (reg_overlap_mentioned_p (SET_DEST (sset), cond)) + { + if (param_ifcvt_allow_register_renaming < 1) + goto free_bitmap_and_fail; + rtx sset_dest = SET_DEST (sset); + if (REG_P (sset_dest) + && (GET_MODE_CLASS (GET_MODE (sset_dest)) != MODE_CC)) + bitmap_set_bit (cond_rename_regs, REGNO (sset_dest)); + else + goto free_bitmap_and_fail; } + potential_cost += pattern_cost (sset, speed_p); + if (SET_DEST (sset) != SET_DEST (last_set)) + bitmap_set_bit (test_bb_temps, REGNO (SET_DEST (sset))); } /* If any of the intermediate results in test_bb are live after test_bb @@ -3777,15 +3954,29 @@ noce_process_if_block (struct noce_if_info *if_info) bool speed_p = optimize_bb_for_speed_p (test_bb); unsigned int then_cost = 0, else_cost = 0; + bitmap cond_rename_regs = BITMAP_ALLOC (®_obstack); if (!bb_valid_for_noce_process_p (then_bb, cond, &then_cost, - &if_info->then_simple)) - return false; + &if_info->then_simple, cond_rename_regs)) + { + BITMAP_FREE (cond_rename_regs); + return false; + } if (else_bb && !bb_valid_for_noce_process_p (else_bb, cond, &else_cost, - &if_info->else_simple)) - return false; + &if_info->else_simple, cond_rename_regs)) + { + BITMAP_FREE (cond_rename_regs); + return false; + } + if (!noce_rename_regs_in_cond (if_info, cond_rename_regs)) + { + BITMAP_FREE (cond_rename_regs); + return false; + } + BITMAP_FREE (cond_rename_regs); + cond = if_info->cond; if (speed_p) if_info->original_cost += average_cost (then_cost, else_cost, find_edge (test_bb, then_bb)); @@ -5823,12 +6014,13 @@ if_convert (bool after_combine) { basic_block bb; int pass; - if (optimize == 1) { df_live_add_problem (); df_live_set_all_dirty (); } + free_dominance_info (CDI_DOMINATORS); + cleanup_cfg (CLEANUP_EXPENSIVE); /* Record whether we are after combine pass. */ ifcvt_after_combine = after_combine; @@ -5933,7 +6125,6 @@ rest_of_handle_if_conversion (void) dump_reg_info (dump_file); dump_flow_info (dump_file, dump_flags); } - cleanup_cfg (CLEANUP_EXPENSIVE); if_convert (false); if (num_updated_if_blocks) /* Get rid of any dead CC-related instructions. */ diff --git a/gcc/params.opt b/gcc/params.opt index d2196dc68..ba87f820b 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -669,6 +669,10 @@ Maximum permissible cost for the sequence that would be generated by the RTL if- Common Joined UInteger Var(param_max_rtl_if_conversion_unpredictable_cost) Init(40) IntegerRange(0, 200) Param Optimization Maximum permissible cost for the sequence that would be generated by the RTL if-conversion pass for a branch that is considered unpredictable. +-param=ifcvt-allow-register-renaming= +Common Joined UInteger Var(param_ifcvt_allow_register_renaming) IntegerRange(0, 2) Param Optimization +Allow RTL if-conversion pass to aggressively rename registers in basic blocks. Sometimes additional moves will be created. + -param=max-sched-extend-regions-iters= Common Joined UInteger Var(param_max_sched_extend_regions_iters) Param Optimization The maximum number of iterations through CFG to extend regions. diff --git a/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c b/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c new file mode 100644 index 000000000..65c4d4140 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c @@ -0,0 +1,35 @@ + +extern void abort(void); + +__attribute__ ((noinline)) +int foo (int x, int y, int z, int a, int b) +{ + if (a < 2) { + if (a == 0) { + if (x - y < 0) + x = x - y + z; + else + x = x - y; + } + else { + if (x + y >= z) + x = x + y - z; + else + x = x + y; + } + } + return x; +} + +int main(void) { + if (foo (5,10,7,0,1) != 2) // x - y + z = -5 + 7 = 2 + abort (); + if (foo (50,10,7,0,1) != 40) // x - y = 40 + abort (); + if (foo (5,10,7,1,1) != 8) // x + y - z = 5 + 10 - 7 = 8 + abort (); + if (foo (5,10,70,1,1) != 15) // x + y = 15 + abort (); + return 0; +} + diff --git a/gcc/testsuite/gcc.dg/ifcvt-6.c b/gcc/testsuite/gcc.dg/ifcvt-6.c new file mode 100644 index 000000000..be9a67b3f --- /dev/null +++ b/gcc/testsuite/gcc.dg/ifcvt-6.c @@ -0,0 +1,27 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-unpredictable-cost=100 --param max-rtl-if-conversion-predictable-cost=100 --param=ifcvt-allow-register-renaming=2 -fifcvt-allow-complicated-cmps" } */ + +typedef unsigned int uint16_t; + +uint16_t +foo (uint16_t x, uint16_t y, uint16_t z, uint16_t a, + uint16_t b, uint16_t c, uint16_t d) { + int i = 1; + int j = 1; + if (a > b) { + j = x; + if (b > c) + i = y; + else + i = z; + } + else { + j = y; + if (c > d) + i = z; + } + return i * j; +} + +/* { dg-final { scan-rtl-dump "7 true changes made" "ce1" } } */ + -- 2.33.0
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2