Projects
openEuler:24.03:SP1:Everything
gcc
_service:tar_scm:0213-Backport-SME-aarch64-Hand...
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File _service:tar_scm:0213-Backport-SME-aarch64-Handle-PSTATE.SM-across-abnorma.patch of Package gcc
From ef9c800309fa326ca56dd9d9affd7d5498624bb8 Mon Sep 17 00:00:00 2001 From: Richard Sandiford <richard.sandiford@arm.com> Date: Tue, 5 Dec 2023 10:11:29 +0000 Subject: [PATCH 114/157] [Backport][SME] aarch64: Handle PSTATE.SM across abnormal edges Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=275706fc59b5fdcc26d46d9f19951fc86b40c515 PSTATE.SM is always off on entry to an exception handler, and on entry to a nonlocal goto receiver. Those entry points need to switch PSTATE.SM back to the appropriate state for the current function. In the case of streaming-compatible functions, they need to restore the mode that the caller was originally using. The requirement on nonlocal goto receivers means that nonlocal jumps need to ensure that PSTATE.SM is zero. gcc/ * config/aarch64/aarch64.cc: Include except.h (aarch64_sme_mode_switch_regs::add_call_preserved_reg): New function. (aarch64_sme_mode_switch_regs::add_call_preserved_regs): Likewise. (aarch64_need_old_pstate_sm): Return true if the function has a nonlocal-goto or exception receiver. (aarch64_switch_pstate_sm_for_landing_pad): New function. (aarch64_switch_pstate_sm_for_jump): Likewise. (pass_switch_pstate_sm::gate): Enable the pass for all streaming and streaming-compatible functions. (pass_switch_pstate_sm::execute): Handle non-local gotos and their receivers. Handle exception handler entry points. gcc/testsuite/ * g++.target/aarch64/sme/exceptions_2.C: New test. * gcc.target/aarch64/sme/nonlocal_goto_1.c: Likewise. * gcc.target/aarch64/sme/nonlocal_goto_2.c: Likewise. * gcc.target/aarch64/sme/nonlocal_goto_3.c: Likewise. * gcc.target/aarch64/sme/nonlocal_goto_4.c: Likewise. * gcc.target/aarch64/sme/nonlocal_goto_5.c: Likewise. * gcc.target/aarch64/sme/nonlocal_goto_6.c: Likewise. * gcc.target/aarch64/sme/nonlocal_goto_7.c: Likewise. --- gcc/config/aarch64/aarch64.cc | 141 ++++++++++++++++- .../g++.target/aarch64/sme/exceptions_2.C | 148 ++++++++++++++++++ .../gcc.target/aarch64/sme/nonlocal_goto_1.c | 58 +++++++ .../gcc.target/aarch64/sme/nonlocal_goto_2.c | 44 ++++++ .../gcc.target/aarch64/sme/nonlocal_goto_3.c | 46 ++++++ .../gcc.target/aarch64/sme/nonlocal_goto_4.c | 25 +++ .../gcc.target/aarch64/sme/nonlocal_goto_5.c | 26 +++ .../gcc.target/aarch64/sme/nonlocal_goto_6.c | 31 ++++ .../gcc.target/aarch64/sme/nonlocal_goto_7.c | 25 +++ 9 files changed, 537 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/g++.target/aarch64/sme/exceptions_2.C create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_4.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_5.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_6.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_7.c diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 4cb43c2e2..effb567c2 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -82,6 +82,7 @@ #include "tree-dfa.h" #include "asan.h" #include "aarch64-feature-deps.h" +#include "except.h" #include "tree-pass.h" #include "cfgbuild.h" @@ -7295,6 +7296,8 @@ public: void add_reg (machine_mode, unsigned int); void add_call_args (rtx_call_insn *); void add_call_result (rtx_call_insn *); + void add_call_preserved_reg (unsigned int); + void add_call_preserved_regs (bitmap); void emit_prologue (); void emit_epilogue (); @@ -7427,6 +7430,46 @@ aarch64_sme_mode_switch_regs::add_call_result (rtx_call_insn *call_insn) add_reg (GET_MODE (dest), REGNO (dest)); } +/* REGNO is a register that is call-preserved under the current function's ABI. + Record that it must be preserved around the mode switch. */ + +void +aarch64_sme_mode_switch_regs::add_call_preserved_reg (unsigned int regno) +{ + if (FP_REGNUM_P (regno)) + switch (crtl->abi->id ()) + { + case ARM_PCS_SVE: + add_reg (VNx16QImode, regno); + break; + case ARM_PCS_SIMD: + add_reg (V16QImode, regno); + break; + case ARM_PCS_AAPCS64: + add_reg (DImode, regno); + break; + default: + gcc_unreachable (); + } + else if (PR_REGNUM_P (regno)) + add_reg (VNx16BImode, regno); +} + +/* The hard registers in REGS are call-preserved under the current function's + ABI. Record that they must be preserved around the mode switch. */ + +void +aarch64_sme_mode_switch_regs::add_call_preserved_regs (bitmap regs) +{ + bitmap_iterator bi; + unsigned int regno; + EXECUTE_IF_SET_IN_BITMAP (regs, 0, regno, bi) + if (HARD_REGISTER_NUM_P (regno)) + add_call_preserved_reg (regno); + else + break; +} + /* Emit code to save registers before the mode switch. */ void @@ -9825,6 +9868,23 @@ aarch64_need_old_pstate_sm () if (aarch64_cfun_enables_pstate_sm ()) return true; + /* Non-local goto receivers are entered with PSTATE.SM equal to 0, + but the function needs to return with PSTATE.SM unchanged. */ + if (nonlocal_goto_handler_labels) + return true; + + /* Likewise for exception handlers. */ + eh_landing_pad lp; + for (unsigned int i = 1; vec_safe_iterate (cfun->eh->lp_array, i, &lp); ++i) + if (lp && lp->post_landing_pad) + return true; + + /* Non-local gotos need to set PSTATE.SM to zero. It's possible to call + streaming-compatible functions without SME being available, so PSTATE.SM + should only be changed if it is currently set to one. */ + if (crtl->has_nonlocal_goto) + return true; + if (cfun->machine->call_switches_pstate_sm) for (auto insn = get_insns (); insn; insn = NEXT_INSN (insn)) if (auto *call = dyn_cast<rtx_call_insn *> (insn)) @@ -30209,6 +30269,59 @@ aarch64_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs, return seq; } +/* BB is the target of an exception or nonlocal goto edge, which means + that PSTATE.SM is known to be 0 on entry. Put it into the state that + the current function requires. */ + +static bool +aarch64_switch_pstate_sm_for_landing_pad (basic_block bb) +{ + if (TARGET_NON_STREAMING) + return false; + + start_sequence (); + rtx_insn *guard_label = nullptr; + if (TARGET_STREAMING_COMPATIBLE) + guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM, + AARCH64_FL_SM_OFF); + aarch64_sme_mode_switch_regs args_switch; + args_switch.add_call_preserved_regs (df_get_live_in (bb)); + args_switch.emit_prologue (); + aarch64_switch_pstate_sm (AARCH64_FL_SM_OFF, AARCH64_FL_SM_ON); + args_switch.emit_epilogue (); + if (guard_label) + emit_label (guard_label); + auto seq = get_insns (); + end_sequence (); + + emit_insn_after (seq, bb_note (bb)); + return true; +} + +/* JUMP is a nonlocal goto. Its target requires PSTATE.SM to be 0 on entry, + so arrange to make it so. */ + +static bool +aarch64_switch_pstate_sm_for_jump (rtx_insn *jump) +{ + if (TARGET_NON_STREAMING) + return false; + + start_sequence (); + rtx_insn *guard_label = nullptr; + if (TARGET_STREAMING_COMPATIBLE) + guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM, + AARCH64_FL_SM_OFF); + aarch64_switch_pstate_sm (AARCH64_FL_SM_ON, AARCH64_FL_SM_OFF); + if (guard_label) + emit_label (guard_label); + auto seq = get_insns (); + end_sequence (); + + emit_insn_before (seq, jump); + return true; +} + /* If CALL involves a change in PSTATE.SM, emit the instructions needed to switch to the new mode and the instructions needed to restore the original mode. Return true if something changed. */ @@ -30292,9 +30405,10 @@ public: }; bool -pass_switch_pstate_sm::gate (function *) +pass_switch_pstate_sm::gate (function *fn) { - return cfun->machine->call_switches_pstate_sm; + return (aarch64_fndecl_pstate_sm (fn->decl) != AARCH64_FL_SM_OFF + || cfun->machine->call_switches_pstate_sm); } /* Emit any instructions needed to switch PSTATE.SM. */ @@ -30307,11 +30421,24 @@ pass_switch_pstate_sm::execute (function *fn) bitmap_clear (blocks); FOR_EACH_BB_FN (bb, fn) { - rtx_insn *insn; - FOR_BB_INSNS (bb, insn) - if (auto *call = dyn_cast<rtx_call_insn *> (insn)) - if (aarch64_switch_pstate_sm_for_call (call)) - bitmap_set_bit (blocks, bb->index); + if (has_abnormal_call_or_eh_pred_edge_p (bb) + && aarch64_switch_pstate_sm_for_landing_pad (bb)) + bitmap_set_bit (blocks, bb->index); + + if (cfun->machine->call_switches_pstate_sm) + { + rtx_insn *insn; + FOR_BB_INSNS (bb, insn) + if (auto *call = dyn_cast<rtx_call_insn *> (insn)) + if (aarch64_switch_pstate_sm_for_call (call)) + bitmap_set_bit (blocks, bb->index); + } + + auto end = BB_END (bb); + if (JUMP_P (end) + && find_reg_note (end, REG_NON_LOCAL_GOTO, NULL_RTX) + && aarch64_switch_pstate_sm_for_jump (end)) + bitmap_set_bit (blocks, bb->index); } find_many_sub_basic_blocks (blocks); clear_aux_for_blocks (); diff --git a/gcc/testsuite/g++.target/aarch64/sme/exceptions_2.C b/gcc/testsuite/g++.target/aarch64/sme/exceptions_2.C new file mode 100644 index 000000000..f791b6ecc --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sme/exceptions_2.C @@ -0,0 +1,148 @@ +// { dg-options "-O -fno-optimize-sibling-calls" } +// { dg-final { check-function-bodies "**" "" } } + +void n_callee(); +void s_callee() __arm_streaming; +void sc_callee() __arm_streaming_compatible; + +void n_callee_ne() noexcept; +void s_callee_ne() noexcept __arm_streaming; +void sc_callee_ne() noexcept __arm_streaming_compatible; + +void n_caller1() +{ + try + { + n_callee(); + sc_callee(); + } + catch (...) + { + n_callee_ne(); + sc_callee_ne(); + } +} +// { dg-final { scan-assembler {_Z9n_caller1v:(?:(?!smstart|smstop).)*\tret} } } + +/* +** _Z9n_caller2v: +** ... +** cntd (x[0-9]+) +** str \1, [^\n]+ +** ... +** bl __cxa_begin_catch +** smstart sm +** bl _Z11s_callee_nev +** smstop sm +** bl __cxa_end_catch +** ... +*/ +void n_caller2() +{ + try + { + n_callee(); + sc_callee(); + } + catch (...) + { + s_callee_ne(); + } +} + +/* +** _Z9s_caller1v: +** ... +** bl __cxa_end_catch +** smstart sm +** ... +*/ +int s_caller1() __arm_streaming +{ + try + { + s_callee(); + return 1; + } + catch (...) + { + return 2; + } +} + +/* +** _Z9s_caller2v: +** ... +** bl __cxa_begin_catch +** smstart sm +** bl _Z11s_callee_nev +** smstop sm +** bl __cxa_end_catch +** smstart sm +** ... +*/ +int s_caller2() __arm_streaming +{ + try + { + n_callee(); + return 1; + } + catch (...) + { + s_callee_ne(); + return 2; + } +} + +/* +** _Z10sc_caller1v: +** ... +** cntd (x[0-9]+) +** str \1, [^\n]+ +** mrs (x[0-9]+), svcr +** str \2, ([^\n]+) +** ... +** bl __cxa_end_catch +** ldr (x[0-9]+), \3 +** tbz \4, 0, [^\n]+ +** smstart sm +** ... +*/ +int sc_caller1() __arm_streaming_compatible +{ + try + { + sc_callee(); + return 1; + } + catch (...) + { + return 2; + } +} + +/* +** _Z10ls_caller1v: +** ... +** cntd (x[0-9]+) +** str \1, [^\n]+ +** ... +** bl __cxa_begin_catch +** smstart sm +** bl _Z12sc_callee_nev +** smstop sm +** bl __cxa_end_catch +** ... +*/ +__arm_locally_streaming void ls_caller1() +{ + try + { + sc_callee(); + } + catch (...) + { + sc_callee_ne(); + } +} diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c new file mode 100644 index 000000000..4e3869fcc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c @@ -0,0 +1,58 @@ +/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +void run(void (*)()); + +/* +** foo: +** ... +** mrs x16, svcr +** ... +** str x16, (.*) +** ... +** ldr x16, \1 +** tbz x16, 0, .* +** smstop sm +** bl __clear_cache +** ldr x16, \1 +** tbz x16, 0, .* +** smstart sm +** add x0, .* +** ldr x16, \1 +** tbz x16, 0, .* +** smstop sm +** bl run +** ldr x16, \1 +** tbz x16, 0, .* +** smstart sm +** mov w0, 1 +** ... +** ret +** ldr x16, \1 +** tbz x16, 0, .* +** smstart sm +** mov w0, 0 +** ... +*/ +int +foo (int *ptr) __arm_streaming_compatible +{ + __label__ failure; + + void bar () { *ptr += 1; goto failure; } + run (bar); + return 1; + +failure: + return 0; +} + +// { dg-final { scan-assembler {\tstp\tx19, x20,} } } +// { dg-final { scan-assembler {\tstp\tx21, x22,} } } +// { dg-final { scan-assembler {\tstp\tx23, x24,} } } +// { dg-final { scan-assembler {\tstp\tx25, x26,} } } +// { dg-final { scan-assembler {\tstp\tx27, x28,} } } +// { dg-final { scan-assembler {\tstp\td8, d9,} } } +// { dg-final { scan-assembler {\tstp\td10, d11,} } } +// { dg-final { scan-assembler {\tstp\td12, d13,} } } +// { dg-final { scan-assembler {\tstp\td14, d15,} } } diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c new file mode 100644 index 000000000..2a2db72c3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c @@ -0,0 +1,44 @@ +/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +void run(void (*)()); + +/* +** foo: +** ... +** smstop sm +** bl __clear_cache +** smstart sm +** add x0, .* +** smstop sm +** bl run +** smstart sm +** mov w0, 1 +** ... +** ret +** smstart sm +** mov w0, 0 +** ... +*/ +int +foo (int *ptr) __arm_streaming +{ + __label__ failure; + + void bar () { *ptr += 1; goto failure; } + run (bar); + return 1; + +failure: + return 0; +} + +// { dg-final { scan-assembler {\tstp\tx19, x20,} } } +// { dg-final { scan-assembler {\tstp\tx21, x22,} } } +// { dg-final { scan-assembler {\tstp\tx23, x24,} } } +// { dg-final { scan-assembler {\tstp\tx25, x26,} } } +// { dg-final { scan-assembler {\tstp\tx27, x28,} } } +// { dg-final { scan-assembler {\tstp\td8, d9,} } } +// { dg-final { scan-assembler {\tstp\td10, d11,} } } +// { dg-final { scan-assembler {\tstp\td12, d13,} } } +// { dg-final { scan-assembler {\tstp\td14, d15,} } } diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c new file mode 100644 index 000000000..022b04052 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c @@ -0,0 +1,46 @@ +/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +void run(void (*)()); + +/* +** foo: +** ... +** smstart sm +** ... +** smstop sm +** bl __clear_cache +** smstart sm +** add x0, .* +** smstop sm +** bl run +** smstart sm +** mov w0, 1 +** ... +** smstart sm +** mov w0, 0 +** smstop sm +** ... +*/ +__arm_locally_streaming int +foo (int *ptr) +{ + __label__ failure; + + void bar () { *ptr += 1; goto failure; } + run (bar); + return 1; + +failure: + return 0; +} + +// { dg-final { scan-assembler {\tstp\tx19, x20,} } } +// { dg-final { scan-assembler {\tstp\tx21, x22,} } } +// { dg-final { scan-assembler {\tstp\tx23, x24,} } } +// { dg-final { scan-assembler {\tstp\tx25, x26,} } } +// { dg-final { scan-assembler {\tstp\tx27, x28,} } } +// { dg-final { scan-assembler {\tstp\td8, d9,} } } +// { dg-final { scan-assembler {\tstp\td10, d11,} } } +// { dg-final { scan-assembler {\tstp\td12, d13,} } } +// { dg-final { scan-assembler {\tstp\td14, d15,} } } diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_4.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_4.c new file mode 100644 index 000000000..044607628 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_4.c @@ -0,0 +1,25 @@ +/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +void run(void (*)()); + +/* +** bar.0: +** ... +** smstart sm +** ... +** smstop sm +** br x[0-9]+ +*/ +int +foo (int *ptr) +{ + __label__ failure; + + __arm_locally_streaming void bar () { *ptr += 1; goto failure; } + run (bar); + return 1; + +failure: + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_5.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_5.c new file mode 100644 index 000000000..4246aec8b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_5.c @@ -0,0 +1,26 @@ +/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +void run(void (*)() __arm_streaming); + +/* +** bar.0: +** ... +** smstop sm +** br x[0-9]+ +*/ +int +foo (int *ptr) +{ + __label__ failure; + + void bar () __arm_streaming { *ptr += 1; goto failure; } + run (bar); + return 1; + +failure: + return 0; +} + +// { dg-final { scan-assembler-not {smstart\t} } } +// { dg-final { scan-assembler-not {mrs\t} } } diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_6.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_6.c new file mode 100644 index 000000000..151e2f22d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_6.c @@ -0,0 +1,31 @@ +/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +void run(void (*)() __arm_streaming_compatible); + +/* +** bar.0: +** ... +** mrs x16, svcr +** ... +** str x16, (.*) +** ... +** ldr x16, \1 +** tbz x16, 0, .* +** smstop sm +** br x[0-9]+ +*/ +int +foo (int *ptr) +{ + __label__ failure; + + void bar () __arm_streaming_compatible { *ptr += 1; goto failure; } + run (bar); + return 1; + +failure: + return 0; +} + +// { dg-final { scan-assembler-not {smstart\t} } } diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_7.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_7.c new file mode 100644 index 000000000..9cc3ad5d2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_7.c @@ -0,0 +1,25 @@ +/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ + +void run(void (*)() __arm_inout("za")); +void callee () __arm_inout("za"); + +int +foo (int *ptr) +{ + __label__ failure; + + void bar () __arm_inout("za") + { + callee (); + *ptr += 1; + goto failure; + } + run (bar); + return 1; + +failure: + return 0; +} + +// { dg-final { scan-assembler-not {\tsmstart\t} } } +// { dg-final { scan-assembler-not {\tsmstop\t} } } -- 2.33.0
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2