Projects
Mega:24.03
gcc
_service:tar_scm:LoongArch-Enable-shrink-wrappi...
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File _service:tar_scm:LoongArch-Enable-shrink-wrapping.patch of Package gcc
From e86c9ece7ae922fe80017ba2ffe22f6267531682 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao <xry111@xry111.site> Date: Sun, 23 Apr 2023 20:52:22 +0800 Subject: [PATCH 045/124] LoongArch: Enable shrink wrapping This commit implements the target macros for shrink wrapping of function prologues/epilogues shrink wrapping on LoongArch. Bootstrapped and regtested on loongarch64-linux-gnu. I don't have an access to SPEC CPU so I hope the reviewer can perform a benchmark to see if there is real benefit. gcc/ChangeLog: * config/loongarch/loongarch.h (struct machine_function): Add reg_is_wrapped_separately array for register wrapping information. * config/loongarch/loongarch.cc (loongarch_get_separate_components): New function. (loongarch_components_for_bb): Likewise. (loongarch_disqualify_components): Likewise. (loongarch_process_components): Likewise. (loongarch_emit_prologue_components): Likewise. (loongarch_emit_epilogue_components): Likewise. (loongarch_set_handled_components): Likewise. (TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS): Define. (TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB): Likewise. (TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS): Likewise. (TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS): Likewise. (TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS): Likewise. (TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS): Likewise. (loongarch_for_each_saved_reg): Skip registers that are wrapped separately. gcc/testsuite/ChangeLog: * gcc.target/loongarch/shrink-wrap.c: New test. Signed-off-by: Peng Fan <fanpeng@loongson.cn> Signed-off-by: ticat_fp <fanpeng@loongson.cn> --- gcc/config/loongarch/loongarch.cc | 179 +++++++++++++++++- gcc/config/loongarch/loongarch.h | 2 + .../gcc.target/loongarch/shrink-wrap.c | 19 ++ 3 files changed, 197 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/shrink-wrap.c diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index d3c6f22ad..4c0f393b6 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -64,6 +64,7 @@ along with GCC; see the file COPYING3. If not see #include "builtins.h" #include "rtl-iter.h" #include "opts.h" +#include "function-abi.h" /* This file should be included last. */ #include "target-def.h" @@ -1014,19 +1015,23 @@ loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset, for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) { - loongarch_save_restore_reg (word_mode, regno, offset, fn); + if (!cfun->machine->reg_is_wrapped_separately[regno]) + loongarch_save_restore_reg (word_mode, regno, offset, fn); + offset -= UNITS_PER_WORD; } /* This loop must iterate over the same space as its companion in loongarch_compute_frame_info. */ offset = cfun->machine->frame.fp_sp_offset - sp_offset; + machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode; + for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST)) { - machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode; + if (!cfun->machine->reg_is_wrapped_separately[regno]) + loongarch_save_restore_reg (word_mode, regno, offset, fn); - loongarch_save_restore_reg (mode, regno, offset, fn); offset -= GET_MODE_SIZE (mode); } } @@ -6630,6 +6635,151 @@ loongarch_asan_shadow_offset (void) return TARGET_64BIT ? (HOST_WIDE_INT_1 << 46) : 0; } +static sbitmap +loongarch_get_separate_components (void) +{ + HOST_WIDE_INT offset; + sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER); + bitmap_clear (components); + offset = cfun->machine->frame.gp_sp_offset; + + /* The stack should be aligned to 16-bytes boundary, so we can make the use + of ldptr instructions. */ + gcc_assert (offset % UNITS_PER_WORD == 0); + + for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) + if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) + { + /* We can wrap general registers saved at [sp, sp + 32768) using the + ldptr/stptr instructions. For large offsets a pseudo register + might be needed which cannot be created during the shrink + wrapping pass. + + TODO: This may need a revise when we add LA32 as ldptr.w is not + guaranteed available by the manual. */ + if (offset < 32768) + bitmap_set_bit (components, regno); + + offset -= UNITS_PER_WORD; + } + + offset = cfun->machine->frame.fp_sp_offset; + for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) + if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST)) + { + /* We can only wrap FP registers with imm12 offsets. For large + offsets a pseudo register might be needed which cannot be + created during the shrink wrapping pass. */ + if (IMM12_OPERAND (offset)) + bitmap_set_bit (components, regno); + + offset -= UNITS_PER_FPREG; + } + + /* Don't mess with the hard frame pointer. */ + if (frame_pointer_needed) + bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM); + + bitmap_clear_bit (components, RETURN_ADDR_REGNUM); + + return components; +} + +static sbitmap +loongarch_components_for_bb (basic_block bb) +{ + /* Registers are used in a bb if they are in the IN, GEN, or KILL sets. */ + auto_bitmap used; + bitmap_copy (used, DF_LIVE_IN (bb)); + bitmap_ior_into (used, &DF_LIVE_BB_INFO (bb)->gen); + bitmap_ior_into (used, &DF_LIVE_BB_INFO (bb)->kill); + + sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER); + bitmap_clear (components); + + function_abi_aggregator callee_abis; + rtx_insn *insn; + FOR_BB_INSNS (bb, insn) + if (CALL_P (insn)) + callee_abis.note_callee_abi (insn_callee_abi (insn)); + + HARD_REG_SET extra_caller_saves = + callee_abis.caller_save_regs (*crtl->abi); + + for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) + if (!fixed_regs[regno] + && !crtl->abi->clobbers_full_reg_p (regno) + && (TEST_HARD_REG_BIT (extra_caller_saves, regno) || + bitmap_bit_p (used, regno))) + bitmap_set_bit (components, regno); + + for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) + if (!fixed_regs[regno] + && !crtl->abi->clobbers_full_reg_p (regno) + && (TEST_HARD_REG_BIT (extra_caller_saves, regno) || + bitmap_bit_p (used, regno))) + bitmap_set_bit (components, regno); + + return components; +} + +static void +loongarch_disqualify_components (sbitmap, edge, sbitmap, bool) +{ + /* Do nothing. */ +} + +static void +loongarch_process_components (sbitmap components, loongarch_save_restore_fn fn) +{ + HOST_WIDE_INT offset = cfun->machine->frame.gp_sp_offset; + + for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) + if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) + { + if (bitmap_bit_p (components, regno)) + loongarch_save_restore_reg (word_mode, regno, offset, fn); + + offset -= UNITS_PER_WORD; + } + + offset = cfun->machine->frame.fp_sp_offset; + machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode; + + for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) + if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST)) + { + if (bitmap_bit_p (components, regno)) + loongarch_save_restore_reg (mode, regno, offset, fn); + + offset -= UNITS_PER_FPREG; + } +} + +static void +loongarch_emit_prologue_components (sbitmap components) +{ + loongarch_process_components (components, loongarch_save_reg); +} + +static void +loongarch_emit_epilogue_components (sbitmap components) +{ + loongarch_process_components (components, loongarch_restore_reg); +} + +static void +loongarch_set_handled_components (sbitmap components) +{ + for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) + if (bitmap_bit_p (components, regno)) + cfun->machine->reg_is_wrapped_separately[regno] = true; + + for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) + if (bitmap_bit_p (components, regno)) + cfun->machine->reg_is_wrapped_separately[regno] = true; +} + /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" @@ -6827,6 +6977,29 @@ loongarch_asan_shadow_offset (void) #undef TARGET_ASAN_SHADOW_OFFSET #define TARGET_ASAN_SHADOW_OFFSET loongarch_asan_shadow_offset +#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS +#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \ + loongarch_get_separate_components + +#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB +#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB loongarch_components_for_bb + +#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS +#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \ + loongarch_disqualify_components + +#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS +#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \ + loongarch_emit_prologue_components + +#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS +#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \ + loongarch_emit_epilogue_components + +#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS +#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \ + loongarch_set_handled_components + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-loongarch.h" diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h index af24bfa01..44ebadfaa 100644 --- a/gcc/config/loongarch/loongarch.h +++ b/gcc/config/loongarch/loongarch.h @@ -1147,6 +1147,8 @@ struct GTY (()) machine_function /* The current frame information, calculated by loongarch_compute_frame_info. */ struct loongarch_frame_info frame; + + bool reg_is_wrapped_separately[FIRST_PSEUDO_REGISTER]; }; #endif diff --git a/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c b/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c new file mode 100644 index 000000000..1431536c5 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fshrink-wrap" } */ + +/* We should not save anything before checking the value of x. */ +/* { dg-final { scan-assembler-not "st(ptr)?\\\.\[dw\].*b(eq|ne)z" } } */ + +int +foo (int x) +{ + __asm__ ("nop" :); + if (x) + { + __asm__ ("" ::: "s0", "s1"); + return x; + } + + __asm__ ("" ::: "s2", "s3"); + return 0; +} -- 2.33.0
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2