Projects
openEuler:24.03:SP1:Everything
gcc
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 5
View file
_service:tar_scm:gcc.spec
Changed
@@ -2,7 +2,7 @@ %global gcc_major 12 # Note, gcc_release must be integer, if you want to add suffixes to # %%{release}, append them after %%{gcc_release} on Release: line. -%global gcc_release 31 +%global gcc_release 32 %global _unpackaged_files_terminate_build 0 %global _performance_build 1 @@ -206,6 +206,27 @@ Patch94: 0094-BUGFIX-AutoBOLT-function-miss-bind-type.patch Patch95: 0095-STABS-remove-gstabs-and-gxcoff-functionality.patch Patch96: 0096-Bugfix-Autofdo-use-PMU-sampling-set-num-eauals-den.patch +Patch97: 0097-aarch64-Use-local-frame-vars-in-shrink-wrapping-code.patch +Patch98: 0098-aarch64-Avoid-a-use-of-callee-offset.patch +Patch99: 0099-aarch64-Explicitly-handle-frames-with-no-saved-registers.patch +Patch100: 0100-aarch64-Add-bytes-below-saved-regs-to-frame-info.patch +Patch101: 0101-aarch64-Add-bytes-below-hard-fp-to-frame-info.patch +Patch102: 0102-aarch64-Tweak-aarch64-save-restore-callee-saves.patch +Patch103: 0103-aarch64-Only-calculate-chain-offset-if-there-is-a-chain.patch +Patch104: 0104-aarch64-Rename-locals-offset-to-bytes-above-locals.patch +Patch105: 0105-aarch64-Rename-hard-fp-offset-to-bytes-above-hard-fp.patch +Patch106: 0106-aarch64-Tweak-frame-size-comment.patch +Patch107: 0107-aarch64-Measure-reg-offset-from-the-bottom-of-the-frame.patch +Patch108: 0108-aarch64-Simplify-top-of-frame-allocation.patch +Patch109: 0109-aarch64-Minor-initial-adjustment-tweak.patch +Patch110: 0110-aarch64-Tweak-stack-clash-boundary-condition.patch +Patch111: 0111-aarch64-Put-LR-save-probe-in-first-16-bytes.patch +Patch112: 0112-aarch64-Simplify-probe-of-final-frame-allocation.patch +Patch113: 0113-aarch64-Explicitly-record-probe-registers-in-frame-info.patch +Patch114: 0114-aarch64-Remove-below-hard-fp-saved-regs-size.patch +Patch115: 0115-aarch64-Make-stack-smash-canary-protect-saved-registers.patch +Patch116: 0116-aarch64-Fix-return-register-handling-in-untyped_call.patch +Patch117: 0117-aarch64-Fix-loose-ldpstp-check.patch # Part 3000 ~ 4999 %ifarch loongarch64 @@ -873,6 +894,27 @@ %patch94 -p1 %patch95 -p1 %patch96 -p1 +%patch97 -p1 +%patch98 -p1 +%patch99 -p1 +%patch100 -p1 +%patch101 -p1 +%patch102 -p1 +%patch103 -p1 +%patch104 -p1 +%patch105 -p1 +%patch106 -p1 +%patch107 -p1 +%patch108 -p1 +%patch109 -p1 +%patch110 -p1 +%patch111 -p1 +%patch112 -p1 +%patch113 -p1 +%patch114 -p1 +%patch115 -p1 +%patch116 -p1 +%patch117 -p1 %ifarch loongarch64 %patch3001 -p1 @@ -3268,6 +3310,12 @@ %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Fri Sep 20 2024 fuanan <fuanan3@h-partners.com> - 12.3.1-32 +- Type:Sync +- ID:NA +- SUG:NA +- DESC:Sync patch for CVE-2023-4039 + * Thu Jul 11 2024 huyubiao <huyubiao@huawei.com> - 12.3.1-31 - Type:SPEC - ID:NA
View file
_service:tar_scm:0097-aarch64-Use-local-frame-vars-in-shrink-wrapping-code.patch
Added
@@ -0,0 +1,378 @@ +From 62fbb215cc817e9f2c1ca80282a64f4ee30806bc Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:08:48 +0100 +Subject: PATCH aarch64: Use local frame vars in shrink-wrapping code + +aarch64_layout_frame uses a shorthand for referring to +cfun->machine->frame: + + aarch64_frame &frame = cfun->machine->frame; + +This patch does the same for some other heavy users of the structure. +No functional change intended. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_save_callee_saves): Use + a local shorthand for cfun->machine->frame. + (aarch64_restore_callee_saves, aarch64_get_separate_components): + (aarch64_process_components): Likewise. + (aarch64_allocate_and_probe_stack_space): Likewise. + (aarch64_expand_prologue, aarch64_expand_epilogue): Likewise. + (aarch64_layout_frame): Use existing shorthand for one more case. +--- + gcc/config/aarch64/aarch64.cc | 123 ++++++++++++++++++---------------- + 1 file changed, 64 insertions(+), 59 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 226dc9dffd47..ae42ffdedbeb 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8351,7 +8351,7 @@ aarch64_layout_frame (void) + frame.is_scs_enabled + = (!crtl->calls_eh_return + && sanitize_flags_p (SANITIZE_SHADOW_CALL_STACK) +- && known_ge (cfun->machine->frame.reg_offsetLR_REGNUM, 0)); ++ && known_ge (frame.reg_offsetLR_REGNUM, 0)); + + /* When shadow call stack is enabled, the scs_pop in the epilogue will + restore x30, and we don't need to pop x30 again in the traditional +@@ -8763,6 +8763,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, + unsigned start, unsigned limit, bool skip_wb, + bool hard_fp_valid_p) + { ++ aarch64_frame &frame = cfun->machine->frame; + rtx_insn *insn; + unsigned regno; + unsigned regno2; +@@ -8777,8 +8778,8 @@ aarch64_save_callee_saves (poly_int64 start_offset, + bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno); + + if (skip_wb +- && (regno == cfun->machine->frame.wb_push_candidate1 +- || regno == cfun->machine->frame.wb_push_candidate2)) ++ && (regno == frame.wb_push_candidate1 ++ || regno == frame.wb_push_candidate2)) + continue; + + if (cfun->machine->reg_is_wrapped_separatelyregno) +@@ -8786,7 +8787,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, + + machine_mode mode = aarch64_reg_save_mode (regno); + reg = gen_rtx_REG (mode, regno); +- offset = start_offset + cfun->machine->frame.reg_offsetregno; ++ offset = start_offset + frame.reg_offsetregno; + rtx base_rtx = stack_pointer_rtx; + poly_int64 sp_offset = offset; + +@@ -8799,7 +8800,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, + { + gcc_assert (known_eq (start_offset, 0)); + poly_int64 fp_offset +- = cfun->machine->frame.below_hard_fp_saved_regs_size; ++ = frame.below_hard_fp_saved_regs_size; + if (hard_fp_valid_p) + base_rtx = hard_frame_pointer_rtx; + else +@@ -8821,8 +8822,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, + && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit + && !cfun->machine->reg_is_wrapped_separatelyregno2 + && known_eq (GET_MODE_SIZE (mode), +- cfun->machine->frame.reg_offsetregno2 +- - cfun->machine->frame.reg_offsetregno)) ++ frame.reg_offsetregno2 - frame.reg_offsetregno)) + { + rtx reg2 = gen_rtx_REG (mode, regno2); + rtx mem2; +@@ -8872,6 +8872,7 @@ static void + aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, + unsigned limit, bool skip_wb, rtx *cfi_ops) + { ++ aarch64_frame &frame = cfun->machine->frame; + unsigned regno; + unsigned regno2; + poly_int64 offset; +@@ -8888,13 +8889,13 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, + rtx reg, mem; + + if (skip_wb +- && (regno == cfun->machine->frame.wb_pop_candidate1 +- || regno == cfun->machine->frame.wb_pop_candidate2)) ++ && (regno == frame.wb_pop_candidate1 ++ || regno == frame.wb_pop_candidate2)) + continue; + + machine_mode mode = aarch64_reg_save_mode (regno); + reg = gen_rtx_REG (mode, regno); +- offset = start_offset + cfun->machine->frame.reg_offsetregno; ++ offset = start_offset + frame.reg_offsetregno; + rtx base_rtx = stack_pointer_rtx; + if (mode == VNx2DImode && BYTES_BIG_ENDIAN) + aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, +@@ -8905,8 +8906,7 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, + && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit + && !cfun->machine->reg_is_wrapped_separatelyregno2 + && known_eq (GET_MODE_SIZE (mode), +- cfun->machine->frame.reg_offsetregno2 +- - cfun->machine->frame.reg_offsetregno)) ++ frame.reg_offsetregno2 - frame.reg_offsetregno)) + { + rtx reg2 = gen_rtx_REG (mode, regno2); + rtx mem2; +@@ -9011,6 +9011,7 @@ offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset) + static sbitmap + aarch64_get_separate_components (void) + { ++ aarch64_frame &frame = cfun->machine->frame; + sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1); + bitmap_clear (components); + +@@ -9027,18 +9028,18 @@ aarch64_get_separate_components (void) + if (mode == VNx2DImode && BYTES_BIG_ENDIAN) + continue; + +- poly_int64 offset = cfun->machine->frame.reg_offsetregno; ++ poly_int64 offset = frame.reg_offsetregno; + + /* If the register is saved in the first SVE save slot, we use + it as a stack probe for -fstack-clash-protection. */ + if (flag_stack_clash_protection +- && maybe_ne (cfun->machine->frame.below_hard_fp_saved_regs_size, 0) ++ && maybe_ne (frame.below_hard_fp_saved_regs_size, 0) + && known_eq (offset, 0)) + continue; + + /* Get the offset relative to the register we'll use. */ + if (frame_pointer_needed) +- offset -= cfun->machine->frame.below_hard_fp_saved_regs_size; ++ offset -= frame.below_hard_fp_saved_regs_size; + else + offset += crtl->outgoing_args_size; + +@@ -9057,11 +9058,11 @@ aarch64_get_separate_components (void) + /* If the spare predicate register used by big-endian SVE code + is call-preserved, it must be saved in the main prologue + before any saves that use it. */ +- if (cfun->machine->frame.spare_pred_reg != INVALID_REGNUM) +- bitmap_clear_bit (components, cfun->machine->frame.spare_pred_reg); ++ if (frame.spare_pred_reg != INVALID_REGNUM) ++ bitmap_clear_bit (components, frame.spare_pred_reg); + +- unsigned reg1 = cfun->machine->frame.wb_push_candidate1; +- unsigned reg2 = cfun->machine->frame.wb_push_candidate2; ++ unsigned reg1 = frame.wb_push_candidate1; ++ unsigned reg2 = frame.wb_push_candidate2; + /* If registers have been chosen to be stored/restored with + writeback don't interfere with them to avoid having to output explicit + stack adjustment instructions. */ +@@ -9170,6 +9171,7 @@ aarch64_get_next_set_bit (sbitmap bmp, unsigned int start) + static void + aarch64_process_components (sbitmap components, bool prologue_p) + { ++ aarch64_frame &frame = cfun->machine->frame; + rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed + ? HARD_FRAME_POINTER_REGNUM + : STACK_POINTER_REGNUM); +@@ -9184,9 +9186,9 @@ aarch64_process_components (sbitmap components, bool prologue_p) + machine_mode mode = aarch64_reg_save_mode (regno); + + rtx reg = gen_rtx_REG (mode, regno); +- poly_int64 offset = cfun->machine->frame.reg_offsetregno; ++ poly_int64 offset = frame.reg_offsetregno; + if (frame_pointer_needed) +- offset -= cfun->machine->frame.below_hard_fp_saved_regs_size; ++ offset -= frame.below_hard_fp_saved_regs_size; + else + offset += crtl->outgoing_args_size; + +@@ -9211,14 +9213,14 @@ aarch64_process_components (sbitmap components, bool prologue_p) + break; + } + +- poly_int64 offset2 = cfun->machine->frame.reg_offsetregno2; ++ poly_int64 offset2 = frame.reg_offsetregno2; + /* The next register is not of the same class or its offset is not + mergeable with the current one into a pair. */ + if (aarch64_sve_mode_p (mode) + || !satisfies_constraint_Ump (mem) + || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2) + || (crtl->abi->id () == ARM_PCS_SIMD && FP_REGNUM_P (regno)) +- || maybe_ne ((offset2 - cfun->machine->frame.reg_offsetregno), ++ || maybe_ne ((offset2 - frame.reg_offsetregno), + GET_MODE_SIZE (mode))) + { + insn = emit_insn (set); +@@ -9240,7 +9242,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) + /* REGNO2 can be saved/restored in a pair with REGNO. */ + rtx reg2 = gen_rtx_REG (mode, regno2); + if (frame_pointer_needed) +- offset2 -= cfun->machine->frame.below_hard_fp_saved_regs_size; ++ offset2 -= frame.below_hard_fp_saved_regs_size; + else + offset2 += crtl->outgoing_args_size; + rtx addr2 = plus_constant (Pmode, ptr_reg, offset2); +@@ -9335,6 +9337,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + bool frame_related_p, + bool final_adjustment_p) + { ++ aarch64_frame &frame = cfun->machine->frame; + HOST_WIDE_INT guard_size + = 1 << param_stack_clash_protection_guard_size; + HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD; +@@ -9355,25 +9358,25 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + register as a probe. We can't assume that LR was saved at position 0 + though, so treat any space below it as unprobed. */ + if (final_adjustment_p +- && known_eq (cfun->machine->frame.below_hard_fp_saved_regs_size, 0)) ++ && known_eq (frame.below_hard_fp_saved_regs_size, 0)) + { +- poly_int64 lr_offset = cfun->machine->frame.reg_offsetLR_REGNUM; ++ poly_int64 lr_offset = frame.reg_offsetLR_REGNUM; + if (known_ge (lr_offset, 0)) + min_probe_threshold -= lr_offset.to_constant (); + else + gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0)); + } + +- poly_int64 frame_size = cfun->machine->frame.frame_size; ++ poly_int64 frame_size = frame.frame_size; + + /* We should always have a positive probe threshold. */ + gcc_assert (min_probe_threshold > 0); + + if (flag_stack_clash_protection && !final_adjustment_p) + { +- poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; +- poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust; +- poly_int64 final_adjust = cfun->machine->frame.final_adjust; ++ poly_int64 initial_adjust = frame.initial_adjust; ++ poly_int64 sve_callee_adjust = frame.sve_callee_adjust; ++ poly_int64 final_adjust = frame.final_adjust; + + if (known_eq (frame_size, 0)) + { +@@ -9662,17 +9665,18 @@ aarch64_epilogue_uses (int regno) + void + aarch64_expand_prologue (void) + { +- poly_int64 frame_size = cfun->machine->frame.frame_size; +- poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; +- HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; +- poly_int64 final_adjust = cfun->machine->frame.final_adjust; +- poly_int64 callee_offset = cfun->machine->frame.callee_offset; +- poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust; ++ aarch64_frame &frame = cfun->machine->frame; ++ poly_int64 frame_size = frame.frame_size; ++ poly_int64 initial_adjust = frame.initial_adjust; ++ HOST_WIDE_INT callee_adjust = frame.callee_adjust; ++ poly_int64 final_adjust = frame.final_adjust; ++ poly_int64 callee_offset = frame.callee_offset; ++ poly_int64 sve_callee_adjust = frame.sve_callee_adjust; + poly_int64 below_hard_fp_saved_regs_size +- = cfun->machine->frame.below_hard_fp_saved_regs_size; +- unsigned reg1 = cfun->machine->frame.wb_push_candidate1; +- unsigned reg2 = cfun->machine->frame.wb_push_candidate2; +- bool emit_frame_chain = cfun->machine->frame.emit_frame_chain; ++ = frame.below_hard_fp_saved_regs_size; ++ unsigned reg1 = frame.wb_push_candidate1; ++ unsigned reg2 = frame.wb_push_candidate2; ++ bool emit_frame_chain = frame.emit_frame_chain; + rtx_insn *insn; + + if (flag_stack_clash_protection && known_eq (callee_adjust, 0)) +@@ -9703,7 +9707,7 @@ aarch64_expand_prologue (void) + } + + /* Push return address to shadow call stack. */ +- if (cfun->machine->frame.is_scs_enabled) ++ if (frame.is_scs_enabled) + emit_insn (gen_scs_push ()); + + if (flag_stack_usage_info) +@@ -9742,7 +9746,7 @@ aarch64_expand_prologue (void) + + /* The offset of the frame chain record (if any) from the current SP. */ + poly_int64 chain_offset = (initial_adjust + callee_adjust +- - cfun->machine->frame.hard_fp_offset); ++ - frame.hard_fp_offset); + gcc_assert (known_ge (chain_offset, 0)); + + /* The offset of the bottom of the save area from the current SP. */ +@@ -9845,16 +9849,17 @@ aarch64_use_return_insn_p (void) + void + aarch64_expand_epilogue (bool for_sibcall) + { +- poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; +- HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; +- poly_int64 final_adjust = cfun->machine->frame.final_adjust; +- poly_int64 callee_offset = cfun->machine->frame.callee_offset; +- poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust; ++ aarch64_frame &frame = cfun->machine->frame; ++ poly_int64 initial_adjust = frame.initial_adjust; ++ HOST_WIDE_INT callee_adjust = frame.callee_adjust; ++ poly_int64 final_adjust = frame.final_adjust; ++ poly_int64 callee_offset = frame.callee_offset; ++ poly_int64 sve_callee_adjust = frame.sve_callee_adjust; + poly_int64 below_hard_fp_saved_regs_size +- = cfun->machine->frame.below_hard_fp_saved_regs_size; +- unsigned reg1 = cfun->machine->frame.wb_pop_candidate1; +- unsigned reg2 = cfun->machine->frame.wb_pop_candidate2; +- unsigned int last_gpr = (cfun->machine->frame.is_scs_enabled ++ = frame.below_hard_fp_saved_regs_size; ++ unsigned reg1 = frame.wb_pop_candidate1; ++ unsigned reg2 = frame.wb_pop_candidate2; ++ unsigned int last_gpr = (frame.is_scs_enabled + ? R29_REGNUM : R30_REGNUM); + rtx cfi_ops = NULL; + rtx_insn *insn; +@@ -9888,7 +9893,7 @@ aarch64_expand_epilogue (bool for_sibcall) + /* We need to add memory barrier to prevent read from deallocated stack. */ + bool need_barrier_p + = maybe_ne (get_frame_size () +- + cfun->machine->frame.saved_varargs_size, 0); ++ + frame.saved_varargs_size, 0); + + /* Emit a barrier to prevent loads from a deallocated stack. */ + if (maybe_gt (final_adjust, crtl->outgoing_args_size) +@@ -9969,7 +9974,7 @@ aarch64_expand_epilogue (bool for_sibcall) + } + + /* Pop return address from shadow call stack. */ +- if (cfun->machine->frame.is_scs_enabled) ++ if (frame.is_scs_enabled) + { + machine_mode mode = aarch64_reg_save_mode (R30_REGNUM); + rtx reg = gen_rtx_REG (mode, R30_REGNUM); +@@ -12564,24 +12569,24 @@ aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) + poly_int64 + aarch64_initial_elimination_offset (unsigned from, unsigned to) + { ++ aarch64_frame &frame = cfun->machine->frame; ++ + if (to == HARD_FRAME_POINTER_REGNUM) + { + if (from == ARG_POINTER_REGNUM) +- return cfun->machine->frame.hard_fp_offset; ++ return frame.hard_fp_offset; + + if (from == FRAME_POINTER_REGNUM) +- return cfun->machine->frame.hard_fp_offset +- - cfun->machine->frame.locals_offset; ++ return frame.hard_fp_offset - frame.locals_offset; + } + + if (to == STACK_POINTER_REGNUM) + { + if (from == FRAME_POINTER_REGNUM) +- return cfun->machine->frame.frame_size +- - cfun->machine->frame.locals_offset; ++ return frame.frame_size - frame.locals_offset; + } + +- return cfun->machine->frame.frame_size; ++ return frame.frame_size; + } + + +-- +2.43.5 +
View file
_service:tar_scm:0098-aarch64-Avoid-a-use-of-callee-offset.patch
Added
@@ -0,0 +1,73 @@ +From 12a8889de169f892d2e927584c00d20b8b7e456f Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:08:49 +0100 +Subject: PATCH aarch64: Avoid a use of callee_offset + +When we emit the frame chain, i.e. when we reach Here in this statement +of aarch64_expand_prologue: + + if (emit_frame_chain) + { + // Here + ... + } + +the stack is in one of two states: + +- We've allocated up to the frame chain, but no more. + +- We've allocated the whole frame, and the frame chain is within easy + reach of the new SP. + +The offset of the frame chain from the current SP is available +in aarch64_frame as callee_offset. It is also available as the +chain_offset local variable, where the latter is calculated from other +data. (However, chain_offset is not always equal to callee_offset when +!emit_frame_chain, so chain_offset isn't redundant.) + +In c600df9a4060da3c6121ff4d0b93f179eafd69d1 I switched to using +chain_offset for the initialisation of the hard frame pointer: + + aarch64_add_offset (Pmode, hard_frame_pointer_rtx, +- stack_pointer_rtx, callee_offset, ++ stack_pointer_rtx, chain_offset, + tmp1_rtx, tmp0_rtx, frame_pointer_needed); + +But the later REG_CFA_ADJUST_CFA handling still used callee_offset. + +I think the difference is harmless, but it's more logical for the +CFA note to be in sync, and it's more convenient for later patches +if it uses chain_offset. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_expand_prologue): Use + chain_offset rather than callee_offset. +--- + gcc/config/aarch64/aarch64.cc | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index ae42ffdedbeb..79253322fd7c 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -9670,7 +9670,6 @@ aarch64_expand_prologue (void) + poly_int64 initial_adjust = frame.initial_adjust; + HOST_WIDE_INT callee_adjust = frame.callee_adjust; + poly_int64 final_adjust = frame.final_adjust; +- poly_int64 callee_offset = frame.callee_offset; + poly_int64 sve_callee_adjust = frame.sve_callee_adjust; + poly_int64 below_hard_fp_saved_regs_size + = frame.below_hard_fp_saved_regs_size; +@@ -9783,8 +9782,7 @@ aarch64_expand_prologue (void) + implicit. */ + if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX)) + { +- rtx src = plus_constant (Pmode, stack_pointer_rtx, +- callee_offset); ++ rtx src = plus_constant (Pmode, stack_pointer_rtx, chain_offset); + add_reg_note (insn, REG_CFA_ADJUST_CFA, + gen_rtx_SET (hard_frame_pointer_rtx, src)); + } +-- +2.43.5 +
View file
_service:tar_scm:0099-aarch64-Explicitly-handle-frames-with-no-saved-registers.patch
Added
@@ -0,0 +1,48 @@ +From 03d5e89e7f3be53fd7142556e8e0a2774c653dca Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:08:49 +0100 +Subject: PATCH aarch64: Explicitly handle frames with no saved registers + +If a frame has no saved registers, it can be allocated in one go. +There is no need to treat the areas below and above the saved +registers as separate. + +And if we allocate the frame in one go, it should be allocated +as the initial_adjust rather than the final_adjust. This allows the +frame size to grow to guard_size - guard_used_by_caller before a stack +probe is needed. (A frame with no register saves is necessarily a +leaf frame.) + +This is a no-op as thing stand, since a leaf function will have +no outgoing arguments, and so all the frame will be above where +the saved registers normally go. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_layout_frame): Explicitly + allocate the frame in one go if there are no saved registers. +--- + gcc/config/aarch64/aarch64.cc | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 79253322fd7c..e1f21230c15e 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8378,9 +8378,11 @@ aarch64_layout_frame (void) + + HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset; + HOST_WIDE_INT const_saved_regs_size; +- if (frame.frame_size.is_constant (&const_size) +- && const_size < max_push_offset +- && known_eq (frame.hard_fp_offset, const_size)) ++ if (known_eq (frame.saved_regs_size, 0)) ++ frame.initial_adjust = frame.frame_size; ++ else if (frame.frame_size.is_constant (&const_size) ++ && const_size < max_push_offset ++ && known_eq (frame.hard_fp_offset, const_size)) + { + /* Simple, small frame with no outgoing arguments: + +-- +2.43.5 +
View file
_service:tar_scm:0100-aarch64-Add-bytes-below-saved-regs-to-frame-info.patch
Added
@@ -0,0 +1,233 @@ +From 49c2eb7616756c323b7f6b18d8616ec945eb1263 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:08:49 +0100 +Subject: PATCH aarch64: Add bytes_below_saved_regs to frame info + +The frame layout code currently hard-codes the assumption that +the number of bytes below the saved registers is equal to the +size of the outgoing arguments. This patch abstracts that +value into a new field of aarch64_frame. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::bytes_below_saved_regs): New + field. + * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it, + and use it instead of crtl->outgoing_args_size. + (aarch64_get_separate_components): Use bytes_below_saved_regs instead + of outgoing_args_size. + (aarch64_process_components): Likewise. +--- + gcc/config/aarch64/aarch64.cc | 71 ++++++++++++++++++----------------- + gcc/config/aarch64/aarch64.h | 5 +++ + 2 files changed, 41 insertions(+), 35 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index e1f21230c15e..94e1b6865849 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8217,6 +8217,8 @@ aarch64_layout_frame (void) + gcc_assert (crtl->is_leaf + || maybe_ne (frame.reg_offsetR30_REGNUM, SLOT_NOT_REQUIRED)); + ++ frame.bytes_below_saved_regs = crtl->outgoing_args_size; ++ + /* Now assign stack slots for the registers. Start with the predicate + registers, since predicate LDR and STR have a relatively small + offset range. These saves happen below the hard frame pointer. */ +@@ -8321,18 +8323,18 @@ aarch64_layout_frame (void) + + poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size; + +- poly_int64 above_outgoing_args ++ poly_int64 saved_regs_and_above + = aligned_upper_bound (varargs_and_saved_regs_size + + get_frame_size (), + STACK_BOUNDARY / BITS_PER_UNIT); + + frame.hard_fp_offset +- = above_outgoing_args - frame.below_hard_fp_saved_regs_size; ++ = saved_regs_and_above - frame.below_hard_fp_saved_regs_size; + + /* Both these values are already aligned. */ +- gcc_assert (multiple_p (crtl->outgoing_args_size, ++ gcc_assert (multiple_p (frame.bytes_below_saved_regs, + STACK_BOUNDARY / BITS_PER_UNIT)); +- frame.frame_size = above_outgoing_args + crtl->outgoing_args_size; ++ frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs; + + frame.locals_offset = frame.saved_varargs_size; + +@@ -8376,7 +8378,7 @@ aarch64_layout_frame (void) + else if (frame.wb_pop_candidate1 != INVALID_REGNUM) + max_push_offset = 256; + +- HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset; ++ HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset; + HOST_WIDE_INT const_saved_regs_size; + if (known_eq (frame.saved_regs_size, 0)) + frame.initial_adjust = frame.frame_size; +@@ -8384,31 +8386,31 @@ aarch64_layout_frame (void) + && const_size < max_push_offset + && known_eq (frame.hard_fp_offset, const_size)) + { +- /* Simple, small frame with no outgoing arguments: ++ /* Simple, small frame with no data below the saved registers. + + stp reg1, reg2, sp, -frame_size! + stp reg3, reg4, sp, 16 */ + frame.callee_adjust = const_size; + } +- else if (crtl->outgoing_args_size.is_constant (&const_outgoing_args_size) ++ else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs) + && frame.saved_regs_size.is_constant (&const_saved_regs_size) +- && const_outgoing_args_size + const_saved_regs_size < 512 +- /* We could handle this case even with outgoing args, provided +- that the number of args left us with valid offsets for all +- predicate and vector save slots. It's such a rare case that +- it hardly seems worth the effort though. */ +- && (!saves_below_hard_fp_p || const_outgoing_args_size == 0) ++ && const_below_saved_regs + const_saved_regs_size < 512 ++ /* We could handle this case even with data below the saved ++ registers, provided that that data left us with valid offsets ++ for all predicate and vector save slots. It's such a rare ++ case that it hardly seems worth the effort though. */ ++ && (!saves_below_hard_fp_p || const_below_saved_regs == 0) + && !(cfun->calls_alloca + && frame.hard_fp_offset.is_constant (&const_fp_offset) + && const_fp_offset < max_push_offset)) + { +- /* Frame with small outgoing arguments: ++ /* Frame with small area below the saved registers: + + sub sp, sp, frame_size +- stp reg1, reg2, sp, outgoing_args_size +- stp reg3, reg4, sp, outgoing_args_size + 16 */ ++ stp reg1, reg2, sp, bytes_below_saved_regs ++ stp reg3, reg4, sp, bytes_below_saved_regs + 16 */ + frame.initial_adjust = frame.frame_size; +- frame.callee_offset = const_outgoing_args_size; ++ frame.callee_offset = const_below_saved_regs; + } + else if (saves_below_hard_fp_p + && known_eq (frame.saved_regs_size, +@@ -8418,30 +8420,29 @@ aarch64_layout_frame (void) + + sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size + save SVE registers relative to SP +- sub sp, sp, outgoing_args_size */ ++ sub sp, sp, bytes_below_saved_regs */ + frame.initial_adjust = (frame.hard_fp_offset + + frame.below_hard_fp_saved_regs_size); +- frame.final_adjust = crtl->outgoing_args_size; ++ frame.final_adjust = frame.bytes_below_saved_regs; + } + else if (frame.hard_fp_offset.is_constant (&const_fp_offset) + && const_fp_offset < max_push_offset) + { +- /* Frame with large outgoing arguments or SVE saves, but with +- a small local area: ++ /* Frame with large area below the saved registers, or with SVE saves, ++ but with a small area above: + + stp reg1, reg2, sp, -hard_fp_offset! + stp reg3, reg4, sp, 16 + sub sp, sp, below_hard_fp_saved_regs_size + save SVE registers relative to SP +- sub sp, sp, outgoing_args_size */ ++ sub sp, sp, bytes_below_saved_regs */ + frame.callee_adjust = const_fp_offset; + frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; +- frame.final_adjust = crtl->outgoing_args_size; ++ frame.final_adjust = frame.bytes_below_saved_regs; + } + else + { +- /* Frame with large local area and outgoing arguments or SVE saves, +- using frame pointer: ++ /* General case: + + sub sp, sp, hard_fp_offset + stp x29, x30, sp, 0 +@@ -8449,10 +8450,10 @@ aarch64_layout_frame (void) + stp reg3, reg4, sp, 16 + sub sp, sp, below_hard_fp_saved_regs_size + save SVE registers relative to SP +- sub sp, sp, outgoing_args_size */ ++ sub sp, sp, bytes_below_saved_regs */ + frame.initial_adjust = frame.hard_fp_offset; + frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; +- frame.final_adjust = crtl->outgoing_args_size; ++ frame.final_adjust = frame.bytes_below_saved_regs; + } + + /* Make sure the individual adjustments add up to the full frame size. */ +@@ -9043,7 +9044,7 @@ aarch64_get_separate_components (void) + if (frame_pointer_needed) + offset -= frame.below_hard_fp_saved_regs_size; + else +- offset += crtl->outgoing_args_size; ++ offset += frame.bytes_below_saved_regs; + + /* Check that we can access the stack slot of the register with one + direct load with no adjustments needed. */ +@@ -9192,7 +9193,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) + if (frame_pointer_needed) + offset -= frame.below_hard_fp_saved_regs_size; + else +- offset += crtl->outgoing_args_size; ++ offset += frame.bytes_below_saved_regs; + + rtx addr = plus_constant (Pmode, ptr_reg, offset); + rtx mem = gen_frame_mem (mode, addr); +@@ -9246,7 +9247,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) + if (frame_pointer_needed) + offset2 -= frame.below_hard_fp_saved_regs_size; + else +- offset2 += crtl->outgoing_args_size; ++ offset2 += frame.bytes_below_saved_regs; + rtx addr2 = plus_constant (Pmode, ptr_reg, offset2); + rtx mem2 = gen_frame_mem (mode, addr2); + rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2) +@@ -9320,10 +9321,10 @@ aarch64_stack_clash_protection_alloca_probe_range (void) + registers. If POLY_SIZE is not large enough to require a probe this function + will only adjust the stack. When allocating the stack space + FRAME_RELATED_P is then used to indicate if the allocation is frame related. +- FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing +- arguments. If we are then we ensure that any allocation larger than the ABI +- defined buffer needs a probe so that the invariant of having a 1KB buffer is +- maintained. ++ FINAL_ADJUSTMENT_P indicates whether we are allocating the area below ++ the saved registers. If we are then we ensure that any allocation ++ larger than the ABI defined buffer needs a probe so that the ++ invariant of having a 1KB buffer is maintained. + + We emit barriers after each stack adjustment to prevent optimizations from + breaking the invariant that we never drop the stack more than a page. This +@@ -9532,7 +9533,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + /* Handle any residuals. Residuals of at least MIN_PROBE_THRESHOLD have to + be probed. This maintains the requirement that each page is probed at + least once. For initial probing we probe only if the allocation is +- more than GUARD_SIZE - buffer, and for the outgoing arguments we probe ++ more than GUARD_SIZE - buffer, and below the saved registers we probe + if the amount is larger than buffer. GUARD_SIZE - buffer + buffer == + GUARD_SIZE. This works that for any allocation that is large enough to + trigger a probe here, we'll have at least one, and if they're not large +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 6834c3e99226..1e105e12db8d 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -871,6 +871,11 @@ struct GTY (()) aarch64_frame + /* The size of the callee-save registers with a slot in REG_OFFSET. */ + poly_int64 saved_regs_size; + ++ /* The number of bytes between the bottom of the static frame (the bottom ++ of the outgoing arguments) and the bottom of the register save area. ++ This value is always a multiple of STACK_BOUNDARY. */ ++ poly_int64 bytes_below_saved_regs; ++ + /* The size of the callee-save registers with a slot in REG_OFFSET that + are saved below the hard frame pointer. */ + poly_int64 below_hard_fp_saved_regs_size; +-- +2.43.5 +
View file
_service:tar_scm:0101-aarch64-Add-bytes-below-hard-fp-to-frame-info.patch
Added
@@ -0,0 +1,84 @@ +From 34081079ea4de0c98331843f574b5f6f94d7b234 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:08:50 +0100 +Subject: PATCH aarch64: Add bytes_below_hard_fp to frame info + +Following on from the previous bytes_below_saved_regs patch, this one +records the number of bytes that are below the hard frame pointer. +This eventually replaces below_hard_fp_saved_regs_size. + +If a frame pointer is not needed, the epilogue adds final_adjust +to the stack pointer before restoring registers: + + aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, true); + +Therefore, if the epilogue needs to restore the stack pointer from +the hard frame pointer, the directly corresponding offset is: + + -bytes_below_hard_fp + final_adjust + +i.e. go from the hard frame pointer to the bottom of the frame, +then add the same amount as if we were using the stack pointer +from the outset. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::bytes_below_hard_fp): New + field. + * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it. + (aarch64_expand_epilogue): Use it instead of + below_hard_fp_saved_regs_size. +--- + gcc/config/aarch64/aarch64.cc | 6 +++--- + gcc/config/aarch64/aarch64.h | 5 +++++ + 2 files changed, 8 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 94e1b6865849..c7d84245fbfc 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8269,6 +8269,7 @@ aarch64_layout_frame (void) + of the callee save area. */ + bool saves_below_hard_fp_p = maybe_ne (offset, 0); + frame.below_hard_fp_saved_regs_size = offset; ++ frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs; + if (frame.emit_frame_chain) + { + /* FP and LR are placed in the linkage record. */ +@@ -9856,8 +9857,7 @@ aarch64_expand_epilogue (bool for_sibcall) + poly_int64 final_adjust = frame.final_adjust; + poly_int64 callee_offset = frame.callee_offset; + poly_int64 sve_callee_adjust = frame.sve_callee_adjust; +- poly_int64 below_hard_fp_saved_regs_size +- = frame.below_hard_fp_saved_regs_size; ++ poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp; + unsigned reg1 = frame.wb_pop_candidate1; + unsigned reg2 = frame.wb_pop_candidate2; + unsigned int last_gpr = (frame.is_scs_enabled +@@ -9915,7 +9915,7 @@ aarch64_expand_epilogue (bool for_sibcall) + is restored on the instruction doing the writeback. */ + aarch64_add_offset (Pmode, stack_pointer_rtx, + hard_frame_pointer_rtx, +- -callee_offset - below_hard_fp_saved_regs_size, ++ -bytes_below_hard_fp + final_adjust, + tmp1_rtx, tmp0_rtx, callee_adjust == 0); + else + /* The case where we need to re-use the register here is very rare, so +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 1e105e12db8d..de68ff7202fc 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -880,6 +880,11 @@ struct GTY (()) aarch64_frame + are saved below the hard frame pointer. */ + poly_int64 below_hard_fp_saved_regs_size; + ++ /* The number of bytes between the bottom of the static frame (the bottom ++ of the outgoing arguments) and the hard frame pointer. This value is ++ always a multiple of STACK_BOUNDARY. */ ++ poly_int64 bytes_below_hard_fp; ++ + /* Offset from the base of the frame (incomming SP) to the + top of the locals area. This value is always a multiple of + STACK_BOUNDARY. */ +-- +2.43.5 +
View file
_service:tar_scm:0102-aarch64-Tweak-aarch64-save-restore-callee-saves.patch
Added
@@ -0,0 +1,225 @@ +From 187861af7c51db9eddc6f954b589c121b210fc74 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:08:50 +0100 +Subject: PATCH aarch64: Tweak aarch64_save/restore_callee_saves + +aarch64_save_callee_saves and aarch64_restore_callee_saves took +a parameter called start_offset that gives the offset of the +bottom of the saved register area from the current stack pointer. +However, it's more convenient for later patches if we use the +bottom of the entire frame as the reference point, rather than +the bottom of the saved registers. + +Doing that removes the need for the callee_offset field. +Other than that, this is not a win on its own. It only really +makes sense in combination with the follow-on patches. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::callee_offset): Delete. + * config/aarch64/aarch64.cc (aarch64_layout_frame): Remove + callee_offset handling. + (aarch64_save_callee_saves): Replace the start_offset parameter + with a bytes_below_sp parameter. + (aarch64_restore_callee_saves): Likewise. + (aarch64_expand_prologue): Update accordingly. + (aarch64_expand_epilogue): Likewise. +--- + gcc/config/aarch64/aarch64.cc | 56 +++++++++++++++++------------------ + gcc/config/aarch64/aarch64.h | 4 --- + 2 files changed, 28 insertions(+), 32 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index c7d84245fbfc..e79551af41df 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8343,7 +8343,6 @@ aarch64_layout_frame (void) + frame.final_adjust = 0; + frame.callee_adjust = 0; + frame.sve_callee_adjust = 0; +- frame.callee_offset = 0; + + frame.wb_pop_candidate1 = frame.wb_push_candidate1; + frame.wb_pop_candidate2 = frame.wb_push_candidate2; +@@ -8411,7 +8410,6 @@ aarch64_layout_frame (void) + stp reg1, reg2, sp, bytes_below_saved_regs + stp reg3, reg4, sp, bytes_below_saved_regs + 16 */ + frame.initial_adjust = frame.frame_size; +- frame.callee_offset = const_below_saved_regs; + } + else if (saves_below_hard_fp_p + && known_eq (frame.saved_regs_size, +@@ -8758,12 +8756,13 @@ aarch64_add_cfa_expression (rtx_insn *insn, rtx reg, + } + + /* Emit code to save the callee-saved registers from register number START +- to LIMIT to the stack at the location starting at offset START_OFFSET, +- skipping any write-back candidates if SKIP_WB is true. HARD_FP_VALID_P +- is true if the hard frame pointer has been set up. */ ++ to LIMIT to the stack. The stack pointer is currently BYTES_BELOW_SP ++ bytes above the bottom of the static frame. Skip any write-back ++ candidates if SKIP_WB is true. HARD_FP_VALID_P is true if the hard ++ frame pointer has been set up. */ + + static void +-aarch64_save_callee_saves (poly_int64 start_offset, ++aarch64_save_callee_saves (poly_int64 bytes_below_sp, + unsigned start, unsigned limit, bool skip_wb, + bool hard_fp_valid_p) + { +@@ -8791,7 +8790,9 @@ aarch64_save_callee_saves (poly_int64 start_offset, + + machine_mode mode = aarch64_reg_save_mode (regno); + reg = gen_rtx_REG (mode, regno); +- offset = start_offset + frame.reg_offsetregno; ++ offset = (frame.reg_offsetregno ++ + frame.bytes_below_saved_regs ++ - bytes_below_sp); + rtx base_rtx = stack_pointer_rtx; + poly_int64 sp_offset = offset; + +@@ -8802,9 +8803,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, + else if (GP_REGNUM_P (regno) + && (!offset.is_constant (&const_offset) || const_offset >= 512)) + { +- gcc_assert (known_eq (start_offset, 0)); +- poly_int64 fp_offset +- = frame.below_hard_fp_saved_regs_size; ++ poly_int64 fp_offset = frame.bytes_below_hard_fp - bytes_below_sp; + if (hard_fp_valid_p) + base_rtx = hard_frame_pointer_rtx; + else +@@ -8868,12 +8867,13 @@ aarch64_save_callee_saves (poly_int64 start_offset, + } + + /* Emit code to restore the callee registers from register number START +- up to and including LIMIT. Restore from the stack offset START_OFFSET, +- skipping any write-back candidates if SKIP_WB is true. Write the +- appropriate REG_CFA_RESTORE notes into CFI_OPS. */ ++ up to and including LIMIT. The stack pointer is currently BYTES_BELOW_SP ++ bytes above the bottom of the static frame. Skip any write-back ++ candidates if SKIP_WB is true. Write the appropriate REG_CFA_RESTORE ++ notes into CFI_OPS. */ + + static void +-aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, ++aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start, + unsigned limit, bool skip_wb, rtx *cfi_ops) + { + aarch64_frame &frame = cfun->machine->frame; +@@ -8899,7 +8899,9 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, + + machine_mode mode = aarch64_reg_save_mode (regno); + reg = gen_rtx_REG (mode, regno); +- offset = start_offset + frame.reg_offsetregno; ++ offset = (frame.reg_offsetregno ++ + frame.bytes_below_saved_regs ++ - bytes_below_sp); + rtx base_rtx = stack_pointer_rtx; + if (mode == VNx2DImode && BYTES_BIG_ENDIAN) + aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, +@@ -9675,8 +9677,6 @@ aarch64_expand_prologue (void) + HOST_WIDE_INT callee_adjust = frame.callee_adjust; + poly_int64 final_adjust = frame.final_adjust; + poly_int64 sve_callee_adjust = frame.sve_callee_adjust; +- poly_int64 below_hard_fp_saved_regs_size +- = frame.below_hard_fp_saved_regs_size; + unsigned reg1 = frame.wb_push_candidate1; + unsigned reg2 = frame.wb_push_candidate2; + bool emit_frame_chain = frame.emit_frame_chain; +@@ -9752,8 +9752,8 @@ aarch64_expand_prologue (void) + - frame.hard_fp_offset); + gcc_assert (known_ge (chain_offset, 0)); + +- /* The offset of the bottom of the save area from the current SP. */ +- poly_int64 saved_regs_offset = chain_offset - below_hard_fp_saved_regs_size; ++ /* The offset of the current SP from the bottom of the static frame. */ ++ poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust; + + if (emit_frame_chain) + { +@@ -9761,7 +9761,7 @@ aarch64_expand_prologue (void) + { + reg1 = R29_REGNUM; + reg2 = R30_REGNUM; +- aarch64_save_callee_saves (saved_regs_offset, reg1, reg2, ++ aarch64_save_callee_saves (bytes_below_sp, reg1, reg2, + false, false); + } + else +@@ -9801,7 +9801,7 @@ aarch64_expand_prologue (void) + emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); + } + +- aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM, ++ aarch64_save_callee_saves (bytes_below_sp, R0_REGNUM, R30_REGNUM, + callee_adjust != 0 || emit_frame_chain, + emit_frame_chain); + if (maybe_ne (sve_callee_adjust, 0)) +@@ -9811,16 +9811,17 @@ aarch64_expand_prologue (void) + aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, + sve_callee_adjust, + !frame_pointer_needed, false); +- saved_regs_offset += sve_callee_adjust; ++ bytes_below_sp -= sve_callee_adjust; + } +- aarch64_save_callee_saves (saved_regs_offset, P0_REGNUM, P15_REGNUM, ++ aarch64_save_callee_saves (bytes_below_sp, P0_REGNUM, P15_REGNUM, + false, emit_frame_chain); +- aarch64_save_callee_saves (saved_regs_offset, V0_REGNUM, V31_REGNUM, ++ aarch64_save_callee_saves (bytes_below_sp, V0_REGNUM, V31_REGNUM, + callee_adjust != 0 || emit_frame_chain, + emit_frame_chain); + + /* We may need to probe the final adjustment if it is larger than the guard + that is assumed by the called. */ ++ gcc_assert (known_eq (bytes_below_sp, final_adjust)); + aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust, + !frame_pointer_needed, true); + } +@@ -9855,7 +9856,6 @@ aarch64_expand_epilogue (bool for_sibcall) + poly_int64 initial_adjust = frame.initial_adjust; + HOST_WIDE_INT callee_adjust = frame.callee_adjust; + poly_int64 final_adjust = frame.final_adjust; +- poly_int64 callee_offset = frame.callee_offset; + poly_int64 sve_callee_adjust = frame.sve_callee_adjust; + poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp; + unsigned reg1 = frame.wb_pop_candidate1; +@@ -9925,9 +9925,9 @@ aarch64_expand_epilogue (bool for_sibcall) + + /* Restore the vector registers before the predicate registers, + so that we can use P4 as a temporary for big-endian SVE frames. */ +- aarch64_restore_callee_saves (callee_offset, V0_REGNUM, V31_REGNUM, ++ aarch64_restore_callee_saves (final_adjust, V0_REGNUM, V31_REGNUM, + callee_adjust != 0, &cfi_ops); +- aarch64_restore_callee_saves (callee_offset, P0_REGNUM, P15_REGNUM, ++ aarch64_restore_callee_saves (final_adjust, P0_REGNUM, P15_REGNUM, + false, &cfi_ops); + if (maybe_ne (sve_callee_adjust, 0)) + aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true); +@@ -9935,7 +9935,7 @@ aarch64_expand_epilogue (bool for_sibcall) + /* When shadow call stack is enabled, the scs_pop in the epilogue will + restore x30, we don't need to restore x30 again in the traditional + way. */ +- aarch64_restore_callee_saves (callee_offset - sve_callee_adjust, ++ aarch64_restore_callee_saves (final_adjust + sve_callee_adjust, + R0_REGNUM, last_gpr, + callee_adjust != 0, &cfi_ops); + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index de68ff7202fc..94fca4b94716 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -907,10 +907,6 @@ struct GTY (()) aarch64_frame + It is zero when no push is used. */ + HOST_WIDE_INT callee_adjust; + +- /* The offset from SP to the callee-save registers after initial_adjust. +- It may be non-zero if no push is used (ie. callee_adjust == 0). */ +- poly_int64 callee_offset; +- + /* The size of the stack adjustment before saving or after restoring + SVE registers. */ + poly_int64 sve_callee_adjust; +-- +2.43.5 +
View file
_service:tar_scm:0103-aarch64-Only-calculate-chain-offset-if-there-is-a-chain.patch
Added
@@ -0,0 +1,44 @@ +From 2b983f9064d808daf909bde1d4a13980934a7e6e Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:08:51 +0100 +Subject: PATCH aarch64: Only calculate chain_offset if there is a chain + +After previous patches, it is no longer necessary to calculate +a chain_offset in cases where there is no chain record. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_expand_prologue): Move the + calculation of chain_offset into the emit_frame_chain block. +--- + gcc/config/aarch64/aarch64.cc | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index e79551af41df..d71a042d6112 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -9747,16 +9747,16 @@ aarch64_expand_prologue (void) + if (callee_adjust != 0) + aarch64_push_regs (reg1, reg2, callee_adjust); + +- /* The offset of the frame chain record (if any) from the current SP. */ +- poly_int64 chain_offset = (initial_adjust + callee_adjust +- - frame.hard_fp_offset); +- gcc_assert (known_ge (chain_offset, 0)); +- + /* The offset of the current SP from the bottom of the static frame. */ + poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust; + + if (emit_frame_chain) + { ++ /* The offset of the frame chain record (if any) from the current SP. */ ++ poly_int64 chain_offset = (initial_adjust + callee_adjust ++ - frame.hard_fp_offset); ++ gcc_assert (known_ge (chain_offset, 0)); ++ + if (callee_adjust == 0) + { + reg1 = R29_REGNUM; +-- +2.43.5 +
View file
_service:tar_scm:0104-aarch64-Rename-locals-offset-to-bytes-above-locals.patch
Added
@@ -0,0 +1,91 @@ +From 0a0a824808d1dec51004fb5805c1a0ae2a35433f Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:08:51 +0100 +Subject: PATCH aarch64: Rename locals_offset to bytes_above_locals +MIME-Version: 1.0 +Content-Type: text/plain; charset=utf8 +Content-Transfer-Encoding: 8bit + +locals_offset was described as: + + /* Offset from the base of the frame (incomming SP) to the + top of the locals area. This value is always a multiple of + STACK_BOUNDARY. */ + +This is implicitly an âupside downâ view of the frame: the incoming +SP is at offset 0, and anything N bytes below the incoming SP is at +offset N (rather than -N). + +However, reg_offset instead uses a âright way upâ view; that is, +it views offsets in address terms. Something above X is at a +positive offset from X and something below X is at a negative +offset from X. + +Also, even on FRAME_GROWS_DOWNWARD targets like AArch64, +target-independent code views offsets in address terms too: +locals are allocated at negative offsets to virtual_stack_vars. + +It seems confusing to have *_offset fields of the same structure +using different polarities like this. This patch tries to avoid +that by renaming locals_offset to bytes_above_locals. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::locals_offset): Rename to... + (aarch64_frame::bytes_above_locals): ...this. + * config/aarch64/aarch64.cc (aarch64_layout_frame) + (aarch64_initial_elimination_offset): Update accordingly. +--- + gcc/config/aarch64/aarch64.cc | 6 +++--- + gcc/config/aarch64/aarch64.h | 6 +++--- + 2 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index d71a042d6112..d4ec352ba98a 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8337,7 +8337,7 @@ aarch64_layout_frame (void) + STACK_BOUNDARY / BITS_PER_UNIT)); + frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs; + +- frame.locals_offset = frame.saved_varargs_size; ++ frame.bytes_above_locals = frame.saved_varargs_size; + + frame.initial_adjust = 0; + frame.final_adjust = 0; +@@ -12578,13 +12578,13 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to) + return frame.hard_fp_offset; + + if (from == FRAME_POINTER_REGNUM) +- return frame.hard_fp_offset - frame.locals_offset; ++ return frame.hard_fp_offset - frame.bytes_above_locals; + } + + if (to == STACK_POINTER_REGNUM) + { + if (from == FRAME_POINTER_REGNUM) +- return frame.frame_size - frame.locals_offset; ++ return frame.frame_size - frame.bytes_above_locals; + } + + return frame.frame_size; +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 94fca4b94716..bf46e6124aa9 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -885,10 +885,10 @@ struct GTY (()) aarch64_frame + always a multiple of STACK_BOUNDARY. */ + poly_int64 bytes_below_hard_fp; + +- /* Offset from the base of the frame (incomming SP) to the +- top of the locals area. This value is always a multiple of ++ /* The number of bytes between the top of the locals area and the top ++ of the frame (the incomming SP). This value is always a multiple of + STACK_BOUNDARY. */ +- poly_int64 locals_offset; ++ poly_int64 bytes_above_locals; + + /* Offset from the base of the frame (incomming SP) to the + hard_frame_pointer. This value is always a multiple of +-- +2.43.5 +
View file
_service:tar_scm:0105-aarch64-Rename-hard-fp-offset-to-bytes-above-hard-fp.patch
Added
@@ -0,0 +1,148 @@ +From 3fbf0789202b30a67b12e1fb785c7130f098d665 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:08:52 +0100 +Subject: PATCH aarch64: Rename hard_fp_offset to bytes_above_hard_fp +MIME-Version: 1.0 +Content-Type: text/plain; charset=utf8 +Content-Transfer-Encoding: 8bit + +Similarly to the previous locals_offset patch, hard_fp_offset +was described as: + + /* Offset from the base of the frame (incomming SP) to the + hard_frame_pointer. This value is always a multiple of + STACK_BOUNDARY. */ + poly_int64 hard_fp_offset; + +which again took an âupside-downâ view: higher offsets meant lower +addresses. This patch renames the field to bytes_above_hard_fp instead. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::hard_fp_offset): Rename + to... + (aarch64_frame::bytes_above_hard_fp): ...this. + * config/aarch64/aarch64.cc (aarch64_layout_frame) + (aarch64_expand_prologue): Update accordingly. + (aarch64_initial_elimination_offset): Likewise. +--- + gcc/config/aarch64/aarch64.cc | 26 +++++++++++++------------- + gcc/config/aarch64/aarch64.h | 6 +++--- + 2 files changed, 16 insertions(+), 16 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index d4ec352ba98a..3c4052740e7a 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8329,7 +8329,7 @@ aarch64_layout_frame (void) + + get_frame_size (), + STACK_BOUNDARY / BITS_PER_UNIT); + +- frame.hard_fp_offset ++ frame.bytes_above_hard_fp + = saved_regs_and_above - frame.below_hard_fp_saved_regs_size; + + /* Both these values are already aligned. */ +@@ -8378,13 +8378,13 @@ aarch64_layout_frame (void) + else if (frame.wb_pop_candidate1 != INVALID_REGNUM) + max_push_offset = 256; + +- HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset; ++ HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp; + HOST_WIDE_INT const_saved_regs_size; + if (known_eq (frame.saved_regs_size, 0)) + frame.initial_adjust = frame.frame_size; + else if (frame.frame_size.is_constant (&const_size) + && const_size < max_push_offset +- && known_eq (frame.hard_fp_offset, const_size)) ++ && known_eq (frame.bytes_above_hard_fp, const_size)) + { + /* Simple, small frame with no data below the saved registers. + +@@ -8401,8 +8401,8 @@ aarch64_layout_frame (void) + case that it hardly seems worth the effort though. */ + && (!saves_below_hard_fp_p || const_below_saved_regs == 0) + && !(cfun->calls_alloca +- && frame.hard_fp_offset.is_constant (&const_fp_offset) +- && const_fp_offset < max_push_offset)) ++ && frame.bytes_above_hard_fp.is_constant (&const_above_fp) ++ && const_above_fp < max_push_offset)) + { + /* Frame with small area below the saved registers: + +@@ -8420,12 +8420,12 @@ aarch64_layout_frame (void) + sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size + save SVE registers relative to SP + sub sp, sp, bytes_below_saved_regs */ +- frame.initial_adjust = (frame.hard_fp_offset ++ frame.initial_adjust = (frame.bytes_above_hard_fp + + frame.below_hard_fp_saved_regs_size); + frame.final_adjust = frame.bytes_below_saved_regs; + } +- else if (frame.hard_fp_offset.is_constant (&const_fp_offset) +- && const_fp_offset < max_push_offset) ++ else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp) ++ && const_above_fp < max_push_offset) + { + /* Frame with large area below the saved registers, or with SVE saves, + but with a small area above: +@@ -8435,7 +8435,7 @@ aarch64_layout_frame (void) + sub sp, sp, below_hard_fp_saved_regs_size + save SVE registers relative to SP + sub sp, sp, bytes_below_saved_regs */ +- frame.callee_adjust = const_fp_offset; ++ frame.callee_adjust = const_above_fp; + frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; + frame.final_adjust = frame.bytes_below_saved_regs; + } +@@ -8450,7 +8450,7 @@ aarch64_layout_frame (void) + sub sp, sp, below_hard_fp_saved_regs_size + save SVE registers relative to SP + sub sp, sp, bytes_below_saved_regs */ +- frame.initial_adjust = frame.hard_fp_offset; ++ frame.initial_adjust = frame.bytes_above_hard_fp; + frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; + frame.final_adjust = frame.bytes_below_saved_regs; + } +@@ -9754,7 +9754,7 @@ aarch64_expand_prologue (void) + { + /* The offset of the frame chain record (if any) from the current SP. */ + poly_int64 chain_offset = (initial_adjust + callee_adjust +- - frame.hard_fp_offset); ++ - frame.bytes_above_hard_fp); + gcc_assert (known_ge (chain_offset, 0)); + + if (callee_adjust == 0) +@@ -12575,10 +12575,10 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to) + if (to == HARD_FRAME_POINTER_REGNUM) + { + if (from == ARG_POINTER_REGNUM) +- return frame.hard_fp_offset; ++ return frame.bytes_above_hard_fp; + + if (from == FRAME_POINTER_REGNUM) +- return frame.hard_fp_offset - frame.bytes_above_locals; ++ return frame.bytes_above_hard_fp - frame.bytes_above_locals; + } + + if (to == STACK_POINTER_REGNUM) +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index bf46e6124aa9..dd1f403f9393 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -890,10 +890,10 @@ struct GTY (()) aarch64_frame + STACK_BOUNDARY. */ + poly_int64 bytes_above_locals; + +- /* Offset from the base of the frame (incomming SP) to the +- hard_frame_pointer. This value is always a multiple of ++ /* The number of bytes between the hard_frame_pointer and the top of ++ the frame (the incomming SP). This value is always a multiple of + STACK_BOUNDARY. */ +- poly_int64 hard_fp_offset; ++ poly_int64 bytes_above_hard_fp; + + /* The size of the frame. This value is the offset from base of the + frame (incomming SP) to the stack_pointer. This value is always +-- +2.43.5 +
View file
_service:tar_scm:0106-aarch64-Tweak-frame-size-comment.patch
Added
@@ -0,0 +1,35 @@ +From aac8b31379ac3bbd14fc6427dce23f56e54e8485 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:08:52 +0100 +Subject: PATCH aarch64: Tweak frame_size comment +MIME-Version: 1.0 +Content-Type: text/plain; charset=utf8 +Content-Transfer-Encoding: 8bit + +This patch fixes another case in which a value was described with +an âupside-downâ view. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::frame_size): Tweak comment. +--- + gcc/config/aarch64/aarch64.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index dd1f403f9393..700524ae22bf 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -895,8 +895,8 @@ struct GTY (()) aarch64_frame + STACK_BOUNDARY. */ + poly_int64 bytes_above_hard_fp; + +- /* The size of the frame. This value is the offset from base of the +- frame (incomming SP) to the stack_pointer. This value is always ++ /* The size of the frame, i.e. the number of bytes between the bottom ++ of the outgoing arguments and the incoming SP. This value is always + a multiple of STACK_BOUNDARY. */ + poly_int64 frame_size; + +-- +2.43.5 +
View file
_service:tar_scm:0107-aarch64-Measure-reg-offset-from-the-bottom-of-the-frame.patch
Added
@@ -0,0 +1,195 @@ +From 8d5506a8aeb8dd7e8b209a3663b07688478f76b9 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:08:53 +0100 +Subject: PATCH aarch64: Measure reg_offset from the bottom of the frame + +reg_offset was measured from the bottom of the saved register area. +This made perfect sense with the original layout, since the bottom +of the saved register area was also the hard frame pointer address. +It became slightly less obvious with SVE, since we save SVE +registers below the hard frame pointer, but it still made sense. + +However, if we want to allow different frame layouts, it's more +convenient and obvious to measure reg_offset from the bottom of +the frame. After previous patches, it's also a slight simplification +in its own right. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame): Add comment above + reg_offset. + * config/aarch64/aarch64.cc (aarch64_layout_frame): Walk offsets + from the bottom of the frame, rather than the bottom of the saved + register area. Measure reg_offset from the bottom of the frame + rather than the bottom of the saved register area. + (aarch64_save_callee_saves): Update accordingly. + (aarch64_restore_callee_saves): Likewise. + (aarch64_get_separate_components): Likewise. + (aarch64_process_components): Likewise. +--- + gcc/config/aarch64/aarch64.cc | 53 ++++++++++++++++------------------- + gcc/config/aarch64/aarch64.h | 3 ++ + 2 files changed, 27 insertions(+), 29 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 3c4052740e7a..97dd077844b4 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8139,7 +8139,6 @@ aarch64_needs_frame_chain (void) + static void + aarch64_layout_frame (void) + { +- poly_int64 offset = 0; + int regno, last_fp_reg = INVALID_REGNUM; + machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM); + poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode); +@@ -8217,7 +8216,9 @@ aarch64_layout_frame (void) + gcc_assert (crtl->is_leaf + || maybe_ne (frame.reg_offsetR30_REGNUM, SLOT_NOT_REQUIRED)); + +- frame.bytes_below_saved_regs = crtl->outgoing_args_size; ++ poly_int64 offset = crtl->outgoing_args_size; ++ gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); ++ frame.bytes_below_saved_regs = offset; + + /* Now assign stack slots for the registers. Start with the predicate + registers, since predicate LDR and STR have a relatively small +@@ -8229,7 +8230,8 @@ aarch64_layout_frame (void) + offset += BYTES_PER_SVE_PRED; + } + +- if (maybe_ne (offset, 0)) ++ poly_int64 saved_prs_size = offset - frame.bytes_below_saved_regs; ++ if (maybe_ne (saved_prs_size, 0)) + { + /* If we have any vector registers to save above the predicate registers, + the offset of the vector register save slots need to be a multiple +@@ -8247,10 +8249,10 @@ aarch64_layout_frame (void) + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); + else + { +- if (known_le (offset, vector_save_size)) +- offset = vector_save_size; +- else if (known_le (offset, vector_save_size * 2)) +- offset = vector_save_size * 2; ++ if (known_le (saved_prs_size, vector_save_size)) ++ offset = frame.bytes_below_saved_regs + vector_save_size; ++ else if (known_le (saved_prs_size, vector_save_size * 2)) ++ offset = frame.bytes_below_saved_regs + vector_save_size * 2; + else + gcc_unreachable (); + } +@@ -8267,9 +8269,10 @@ aarch64_layout_frame (void) + + /* OFFSET is now the offset of the hard frame pointer from the bottom + of the callee save area. */ +- bool saves_below_hard_fp_p = maybe_ne (offset, 0); +- frame.below_hard_fp_saved_regs_size = offset; +- frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs; ++ frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; ++ bool saves_below_hard_fp_p ++ = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); ++ frame.bytes_below_hard_fp = offset; + if (frame.emit_frame_chain) + { + /* FP and LR are placed in the linkage record. */ +@@ -8320,9 +8323,10 @@ aarch64_layout_frame (void) + + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); + +- frame.saved_regs_size = offset; ++ frame.saved_regs_size = offset - frame.bytes_below_saved_regs; + +- poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size; ++ poly_int64 varargs_and_saved_regs_size ++ = frame.saved_regs_size + frame.saved_varargs_size; + + poly_int64 saved_regs_and_above + = aligned_upper_bound (varargs_and_saved_regs_size +@@ -8790,9 +8794,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, + + machine_mode mode = aarch64_reg_save_mode (regno); + reg = gen_rtx_REG (mode, regno); +- offset = (frame.reg_offsetregno +- + frame.bytes_below_saved_regs +- - bytes_below_sp); ++ offset = frame.reg_offsetregno - bytes_below_sp; + rtx base_rtx = stack_pointer_rtx; + poly_int64 sp_offset = offset; + +@@ -8899,9 +8901,7 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start, + + machine_mode mode = aarch64_reg_save_mode (regno); + reg = gen_rtx_REG (mode, regno); +- offset = (frame.reg_offsetregno +- + frame.bytes_below_saved_regs +- - bytes_below_sp); ++ offset = frame.reg_offsetregno - bytes_below_sp; + rtx base_rtx = stack_pointer_rtx; + if (mode == VNx2DImode && BYTES_BIG_ENDIAN) + aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, +@@ -9040,14 +9040,12 @@ aarch64_get_separate_components (void) + it as a stack probe for -fstack-clash-protection. */ + if (flag_stack_clash_protection + && maybe_ne (frame.below_hard_fp_saved_regs_size, 0) +- && known_eq (offset, 0)) ++ && known_eq (offset, frame.bytes_below_saved_regs)) + continue; + + /* Get the offset relative to the register we'll use. */ + if (frame_pointer_needed) +- offset -= frame.below_hard_fp_saved_regs_size; +- else +- offset += frame.bytes_below_saved_regs; ++ offset -= frame.bytes_below_hard_fp; + + /* Check that we can access the stack slot of the register with one + direct load with no adjustments needed. */ +@@ -9194,9 +9192,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) + rtx reg = gen_rtx_REG (mode, regno); + poly_int64 offset = frame.reg_offsetregno; + if (frame_pointer_needed) +- offset -= frame.below_hard_fp_saved_regs_size; +- else +- offset += frame.bytes_below_saved_regs; ++ offset -= frame.bytes_below_hard_fp; + + rtx addr = plus_constant (Pmode, ptr_reg, offset); + rtx mem = gen_frame_mem (mode, addr); +@@ -9248,9 +9244,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) + /* REGNO2 can be saved/restored in a pair with REGNO. */ + rtx reg2 = gen_rtx_REG (mode, regno2); + if (frame_pointer_needed) +- offset2 -= frame.below_hard_fp_saved_regs_size; +- else +- offset2 += frame.bytes_below_saved_regs; ++ offset2 -= frame.bytes_below_hard_fp; + rtx addr2 = plus_constant (Pmode, ptr_reg, offset2); + rtx mem2 = gen_frame_mem (mode, addr2); + rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2) +@@ -9366,7 +9360,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + if (final_adjustment_p + && known_eq (frame.below_hard_fp_saved_regs_size, 0)) + { +- poly_int64 lr_offset = frame.reg_offsetLR_REGNUM; ++ poly_int64 lr_offset = (frame.reg_offsetLR_REGNUM ++ - frame.bytes_below_saved_regs); + if (known_ge (lr_offset, 0)) + min_probe_threshold -= lr_offset.to_constant (); + else +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 700524ae22bf..b61358370732 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -860,6 +860,9 @@ extern enum aarch64_processor aarch64_tune; + #ifdef HAVE_POLY_INT_H + struct GTY (()) aarch64_frame + { ++ /* The offset from the bottom of the static frame (the bottom of the ++ outgoing arguments) of each register save slot, or -2 if no save is ++ needed. */ + poly_int64 reg_offsetLAST_SAVED_REGNUM + 1; + + /* The number of extra stack bytes taken up by register varargs. +-- +2.43.5 +
View file
_service:tar_scm:0108-aarch64-Simplify-top-of-frame-allocation.patch
Added
@@ -0,0 +1,55 @@ +From b47766614df3b9df878262efb2ad73aaac108363 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:08:53 +0100 +Subject: PATCH aarch64: Simplify top of frame allocation + +After previous patches, it no longer really makes sense to allocate +the top of the frame in terms of varargs_and_saved_regs_size and +saved_regs_and_above. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_layout_frame): Simplify + the allocation of the top of the frame. +--- + gcc/config/aarch64/aarch64.cc | 23 ++++++++--------------- + 1 file changed, 8 insertions(+), 15 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 97dd077844b4..81935852d5b2 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8325,23 +8325,16 @@ aarch64_layout_frame (void) + + frame.saved_regs_size = offset - frame.bytes_below_saved_regs; + +- poly_int64 varargs_and_saved_regs_size +- = frame.saved_regs_size + frame.saved_varargs_size; +- +- poly_int64 saved_regs_and_above +- = aligned_upper_bound (varargs_and_saved_regs_size +- + get_frame_size (), +- STACK_BOUNDARY / BITS_PER_UNIT); +- +- frame.bytes_above_hard_fp +- = saved_regs_and_above - frame.below_hard_fp_saved_regs_size; ++ offset += get_frame_size (); ++ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); ++ auto top_of_locals = offset; + +- /* Both these values are already aligned. */ +- gcc_assert (multiple_p (frame.bytes_below_saved_regs, +- STACK_BOUNDARY / BITS_PER_UNIT)); +- frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs; ++ offset += frame.saved_varargs_size; ++ gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); ++ frame.frame_size = offset; + +- frame.bytes_above_locals = frame.saved_varargs_size; ++ frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp; ++ frame.bytes_above_locals = frame.frame_size - top_of_locals; + + frame.initial_adjust = 0; + frame.final_adjust = 0; +-- +2.43.5 +
View file
_service:tar_scm:0109-aarch64-Minor-initial-adjustment-tweak.patch
Added
@@ -0,0 +1,38 @@ +From 08f71b4bb28fb74d20e8d2927a557e8119ce9f4d Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:08:54 +0100 +Subject: PATCH aarch64: Minor initial adjustment tweak + +This patch just changes a calculation of initial_adjust +to one that makes it slightly more obvious that the total +adjustment is frame.frame_size. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_layout_frame): Tweak + calculation of initial_adjust for frames in which all saves + are SVE saves. +--- + gcc/config/aarch64/aarch64.cc | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 81935852d5b2..4d9fcf3d1623 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8414,11 +8414,10 @@ aarch64_layout_frame (void) + { + /* Frame in which all saves are SVE saves: + +- sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size ++ sub sp, sp, frame_size - bytes_below_saved_regs + save SVE registers relative to SP + sub sp, sp, bytes_below_saved_regs */ +- frame.initial_adjust = (frame.bytes_above_hard_fp +- + frame.below_hard_fp_saved_regs_size); ++ frame.initial_adjust = frame.frame_size - frame.bytes_below_saved_regs; + frame.final_adjust = frame.bytes_below_saved_regs; + } + else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp) +-- +2.43.5 +
View file
_service:tar_scm:0110-aarch64-Tweak-stack-clash-boundary-condition.patch
Added
@@ -0,0 +1,125 @@ +From f22315d5c19e8310e4dc880fd509678fd291fca8 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:08:54 +0100 +Subject: PATCH aarch64: Tweak stack clash boundary condition + +The AArch64 ABI says that, when stack clash protection is used, +there can be a maximum of 1KiB of unprobed space at sp on entry +to a function. Therefore, we need to probe when allocating +>= guard_size - 1KiB of data (>= rather than >). This is what +GCC does. + +If an allocation is exactly guard_size bytes, it is enough to allocate +those bytes and probe once at offset 1024. It isn't possible to use a +single probe at any other offset: higher would conmplicate later code, +by leaving more unprobed space than usual, while lower would risk +leaving an entire page unprobed. For simplicity, the code probes all +allocations at offset 1024. + +Some register saves also act as probes. If we need to allocate +more space below the last such register save probe, we need to +probe the allocation if it is > 1KiB. Again, this allocation is +then sometimes (but not always) probed at offset 1024. This sort of +allocation is currently only used for outgoing arguments, which are +rarely this big. + +However, the code also probed if this final outgoing-arguments +allocation was == 1KiB, rather than just > 1KiB. This isn't +necessary, since the register save then probes at offset 1024 +as required. Continuing to probe allocations of exactly 1KiB +would complicate later patches. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space): + Don't probe final allocations that are exactly 1KiB in size (after + unprobed space above the final allocation has been deducted). + +gcc/testsuite/ + * gcc.target/aarch64/stack-check-prologue-17.c: New test. +--- + gcc/config/aarch64/aarch64.cc | 4 +- + .../aarch64/stack-check-prologue-17.c | 55 +++++++++++++++++++ + 2 files changed, 58 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 4d9fcf3d1623..34c1d8614cd9 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -9333,9 +9333,11 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + HOST_WIDE_INT guard_size + = 1 << param_stack_clash_protection_guard_size; + HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD; ++ HOST_WIDE_INT byte_sp_alignment = STACK_BOUNDARY / BITS_PER_UNIT; ++ gcc_assert (multiple_p (poly_size, byte_sp_alignment)); + HOST_WIDE_INT min_probe_threshold + = (final_adjustment_p +- ? guard_used_by_caller ++ ? guard_used_by_caller + byte_sp_alignment + : guard_size - guard_used_by_caller); + /* When doing the final adjustment for the outgoing arguments, take into + account any unprobed space there is above the current SP. There are +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c +new file mode 100644 +index 000000000000..0d8a25d73a24 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c +@@ -0,0 +1,55 @@ ++/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++void f(int, ...); ++void g(); ++ ++/* ++** test1: ++** ... ++** str x30, \sp\ ++** sub sp, sp, #1024 ++** cbnz w0, .* ++** bl g ++** ... ++*/ ++int test1(int z) { ++ __uint128_t x = 0; ++ int y0x400; ++ if (z) ++ { ++ f(0, 0, 0, 0, 0, 0, 0, &y, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x); ++ } ++ g(); ++ return 1; ++} ++ ++/* ++** test2: ++** ... ++** str x30, \sp\ ++** sub sp, sp, #1040 ++** str xzr, \sp\ ++** cbnz w0, .* ++** bl g ++** ... ++*/ ++int test2(int z) { ++ __uint128_t x = 0; ++ int y0x400; ++ if (z) ++ { ++ f(0, 0, 0, 0, 0, 0, 0, &y, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x); ++ } ++ g(); ++ return 1; ++} +-- +2.43.5 +
View file
_service:tar_scm:0111-aarch64-Put-LR-save-probe-in-first-16-bytes.patch
Added
@@ -0,0 +1,406 @@ +From 15e18831bf98fd25af098b970ebf0c9a6200a34b Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:08:55 +0100 +Subject: PATCH aarch64: Put LR save probe in first 16 bytes + +-fstack-clash-protection uses the save of LR as a probe for the next +allocation. The next allocation could be: + +* another part of the static frame, e.g. when allocating SVE save slots + or outgoing arguments + +* an alloca in the same function + +* an allocation made by a callee function + +However, when -fomit-frame-pointer is used, the LR save slot is placed +above the other GPR save slots. It could therefore be up to 80 bytes +above the base of the GPR save area (which is also the hard fp address). + +aarch64_allocate_and_probe_stack_space took this into account when +deciding how much subsequent space could be allocated without needing +a probe. However, it interacted badly with: + + /* If doing a small final adjustment, we always probe at offset 0. + This is done to avoid issues when LR is not at position 0 or when + the final adjustment is smaller than the probing offset. */ + else if (final_adjustment_p && rounded_size == 0) + residual_probe_offset = 0; + +which forces any allocation that is smaller than the guard page size +to be probed at offset 0 rather than the usual offset 1024. It was +therefore possible to construct cases in which we had: + +* a probe using LR at SP + 80 bytes (or some other value >= 16) +* an allocation of the guard page size - 16 bytes +* a probe at SP + 0 + +which allocates guard page size + 64 consecutive unprobed bytes. + +This patch requires the LR probe to be in the first 16 bytes of the +save area when stack clash protection is active. Doing it +unconditionally would cause code-quality regressions. + +Putting LR before other registers prevents push/pop allocation +when shadow call stacks are enabled, since LR is restored +separately from the other callee-saved registers. + +The new comment doesn't say that the probe register is required +to be LR, since a later patch removes that restriction. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_layout_frame): Ensure that + the LR save slot is in the first 16 bytes of the register save area. + Only form STP/LDP push/pop candidates if both registers are valid. + (aarch64_allocate_and_probe_stack_space): Remove workaround for + when LR was not in the first 16 bytes. + +gcc/testsuite/ + * gcc.target/aarch64/stack-check-prologue-18.c: New test. + * gcc.target/aarch64/stack-check-prologue-19.c: Likewise. + * gcc.target/aarch64/stack-check-prologue-20.c: Likewise. +--- + gcc/config/aarch64/aarch64.cc | 72 ++++++------- + .../aarch64/stack-check-prologue-18.c | 100 ++++++++++++++++++ + .../aarch64/stack-check-prologue-19.c | 100 ++++++++++++++++++ + .../aarch64/stack-check-prologue-20.c | 3 + + 4 files changed, 233 insertions(+), 42 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 34c1d8614cd9..16433fb70f4f 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8273,26 +8273,34 @@ aarch64_layout_frame (void) + bool saves_below_hard_fp_p + = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); + frame.bytes_below_hard_fp = offset; ++ ++ auto allocate_gpr_slot = &(unsigned int regno) ++ { ++ frame.reg_offsetregno = offset; ++ if (frame.wb_push_candidate1 == INVALID_REGNUM) ++ frame.wb_push_candidate1 = regno; ++ else if (frame.wb_push_candidate2 == INVALID_REGNUM) ++ frame.wb_push_candidate2 = regno; ++ offset += UNITS_PER_WORD; ++ }; ++ + if (frame.emit_frame_chain) + { + /* FP and LR are placed in the linkage record. */ +- frame.reg_offsetR29_REGNUM = offset; +- frame.wb_push_candidate1 = R29_REGNUM; +- frame.reg_offsetR30_REGNUM = offset + UNITS_PER_WORD; +- frame.wb_push_candidate2 = R30_REGNUM; +- offset += 2 * UNITS_PER_WORD; ++ allocate_gpr_slot (R29_REGNUM); ++ allocate_gpr_slot (R30_REGNUM); + } ++ else if (flag_stack_clash_protection ++ && known_eq (frame.reg_offsetR30_REGNUM, SLOT_REQUIRED)) ++ /* Put the LR save slot first, since it makes a good choice of probe ++ for stack clash purposes. The idea is that the link register usually ++ has to be saved before a call anyway, and so we lose little by ++ stopping it from being individually shrink-wrapped. */ ++ allocate_gpr_slot (R30_REGNUM); + + for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) + if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) +- { +- frame.reg_offsetregno = offset; +- if (frame.wb_push_candidate1 == INVALID_REGNUM) +- frame.wb_push_candidate1 = regno; +- else if (frame.wb_push_candidate2 == INVALID_REGNUM) +- frame.wb_push_candidate2 = regno; +- offset += UNITS_PER_WORD; +- } ++ allocate_gpr_slot (regno); + + poly_int64 max_int_offset = offset; + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); +@@ -8370,10 +8378,13 @@ aarch64_layout_frame (void) + max_push_offset to 0, because no registers are popped at this time, + so callee_adjust cannot be adjusted. */ + HOST_WIDE_INT max_push_offset = 0; +- if (frame.wb_pop_candidate2 != INVALID_REGNUM) +- max_push_offset = 512; +- else if (frame.wb_pop_candidate1 != INVALID_REGNUM) +- max_push_offset = 256; ++ if (frame.wb_pop_candidate1 != INVALID_REGNUM) ++ { ++ if (frame.wb_pop_candidate2 != INVALID_REGNUM) ++ max_push_offset = 512; ++ else ++ max_push_offset = 256; ++ } + + HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp; + HOST_WIDE_INT const_saved_regs_size; +@@ -9339,29 +9350,6 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + = (final_adjustment_p + ? guard_used_by_caller + byte_sp_alignment + : guard_size - guard_used_by_caller); +- /* When doing the final adjustment for the outgoing arguments, take into +- account any unprobed space there is above the current SP. There are +- two cases: +- +- - When saving SVE registers below the hard frame pointer, we force +- the lowest save to take place in the prologue before doing the final +- adjustment (i.e. we don't allow the save to be shrink-wrapped). +- This acts as a probe at SP, so there is no unprobed space. +- +- - When there are no SVE register saves, we use the store of the link +- register as a probe. We can't assume that LR was saved at position 0 +- though, so treat any space below it as unprobed. */ +- if (final_adjustment_p +- && known_eq (frame.below_hard_fp_saved_regs_size, 0)) +- { +- poly_int64 lr_offset = (frame.reg_offsetLR_REGNUM +- - frame.bytes_below_saved_regs); +- if (known_ge (lr_offset, 0)) +- min_probe_threshold -= lr_offset.to_constant (); +- else +- gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0)); +- } +- + poly_int64 frame_size = frame.frame_size; + + /* We should always have a positive probe threshold. */ +@@ -9541,8 +9529,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + if (final_adjustment_p && rounded_size != 0) + min_probe_threshold = 0; + /* If doing a small final adjustment, we always probe at offset 0. +- This is done to avoid issues when LR is not at position 0 or when +- the final adjustment is smaller than the probing offset. */ ++ This is done to avoid issues when the final adjustment is smaller ++ than the probing offset. */ + else if (final_adjustment_p && rounded_size == 0) + residual_probe_offset = 0; + +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c +new file mode 100644 +index 000000000000..82447d20fff5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c +@@ -0,0 +1,100 @@ ++/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++void f(int, ...); ++void g(); ++ ++/* ++** test1: ++** ... ++** str x30, \sp\ ++** sub sp, sp, #4064 ++** str xzr, \sp\ ++** cbnz w0, .* ++** bl g ++** ... ++** str x26, \sp, #?4128\ ++** ... ++*/ ++int test1(int z) { ++ __uint128_t x = 0; ++ int y0x400; ++ if (z) ++ { ++ asm volatile ("" ::: ++ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); ++ f(0, 0, 0, 0, 0, 0, 0, &y, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x); ++ } ++ g(); ++ return 1; ++} ++ ++/* ++** test2: ++** ... ++** str x30, \sp\ ++** sub sp, sp, #1040 ++** str xzr, \sp\ ++** cbnz w0, .* ++** bl g ++** ... ++*/ ++int test2(int z) { ++ __uint128_t x = 0; ++ int y0x400; ++ if (z) ++ { ++ asm volatile ("" ::: ++ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); ++ f(0, 0, 0, 0, 0, 0, 0, &y, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x); ++ } ++ g(); ++ return 1; ++} ++ ++/* ++** test3: ++** ... ++** str x30, \sp\ ++** sub sp, sp, #1024 ++** cbnz w0, .* ++** bl g ++** ... ++*/ ++int test3(int z) { ++ __uint128_t x = 0; ++ int y0x400; ++ if (z) ++ { ++ asm volatile ("" ::: ++ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); ++ f(0, 0, 0, 0, 0, 0, 0, &y, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x); ++ } ++ g(); ++ return 1; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c +new file mode 100644 +index 000000000000..73ac3e4e4eb0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c +@@ -0,0 +1,100 @@ ++/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12 -fsanitize=shadow-call-stack -ffixed-x18" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++void f(int, ...); ++void g(); ++ ++/* ++** test1: ++** ... ++** str x30, \sp\ ++** sub sp, sp, #4064 ++** str xzr, \sp\ ++** cbnz w0, .* ++** bl g ++** ... ++** str x26, \sp, #?4128\ ++** ... ++*/ ++int test1(int z) { ++ __uint128_t x = 0; ++ int y0x400; ++ if (z) ++ { ++ asm volatile ("" ::: ++ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); ++ f(0, 0, 0, 0, 0, 0, 0, &y, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x); ++ } ++ g(); ++ return 1; ++} ++ ++/* ++** test2: ++** ... ++** str x30, \sp\ ++** sub sp, sp, #1040 ++** str xzr, \sp\ ++** cbnz w0, .* ++** bl g ++** ... ++*/ ++int test2(int z) { ++ __uint128_t x = 0; ++ int y0x400; ++ if (z) ++ { ++ asm volatile ("" ::: ++ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); ++ f(0, 0, 0, 0, 0, 0, 0, &y, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x); ++ } ++ g(); ++ return 1; ++} ++ ++/* ++** test3: ++** ... ++** str x30, \sp\ ++** sub sp, sp, #1024 ++** cbnz w0, .* ++** bl g ++** ... ++*/ ++int test3(int z) { ++ __uint128_t x = 0; ++ int y0x400; ++ if (z) ++ { ++ asm volatile ("" ::: ++ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); ++ f(0, 0, 0, 0, 0, 0, 0, &y, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x); ++ } ++ g(); ++ return 1; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c +new file mode 100644 +index 000000000000..690aae8dfd5b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c +@@ -0,0 +1,3 @@ ++/* { dg-options "-O2 -fstack-protector-all -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12 -fsanitize=shadow-call-stack -ffixed-x18" } */ ++ ++#include "stack-check-prologue-19.c" +-- +2.43.5 +
View file
_service:tar_scm:0112-aarch64-Simplify-probe-of-final-frame-allocation.patch
Added
@@ -0,0 +1,123 @@ +From c4f0e121faa36342f1d21919e54a05ad841c4f86 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:08:55 +0100 +Subject: PATCH aarch64: Simplify probe of final frame allocation + +Previous patches ensured that the final frame allocation only needs +a probe when the size is strictly greater than 1KiB. It's therefore +safe to use the normal 1024 probe offset in all cases. + +The main motivation for doing this is to simplify the code and +remove the number of special cases. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space): + Always probe the residual allocation at offset 1024, asserting + that that is in range. + +gcc/testsuite/ + * gcc.target/aarch64/stack-check-prologue-17.c: Expect the probe + to be at offset 1024 rather than offset 0. + * gcc.target/aarch64/stack-check-prologue-18.c: Likewise. + * gcc.target/aarch64/stack-check-prologue-19.c: Likewise. +--- + gcc/config/aarch64/aarch64.cc | 12 ++++-------- + .../gcc.target/aarch64/stack-check-prologue-17.c | 2 +- + .../gcc.target/aarch64/stack-check-prologue-18.c | 4 ++-- + .../gcc.target/aarch64/stack-check-prologue-19.c | 4 ++-- + 4 files changed, 9 insertions(+), 13 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 16433fb70f4f..8abf3d7a1e2b 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -9523,16 +9523,12 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + are still safe. */ + if (residual) + { +- HOST_WIDE_INT residual_probe_offset = guard_used_by_caller; ++ gcc_assert (guard_used_by_caller + byte_sp_alignment <= size); ++ + /* If we're doing final adjustments, and we've done any full page + allocations then any residual needs to be probed. */ + if (final_adjustment_p && rounded_size != 0) + min_probe_threshold = 0; +- /* If doing a small final adjustment, we always probe at offset 0. +- This is done to avoid issues when the final adjustment is smaller +- than the probing offset. */ +- else if (final_adjustment_p && rounded_size == 0) +- residual_probe_offset = 0; + + aarch64_sub_sp (temp1, temp2, residual, frame_related_p); + if (residual >= min_probe_threshold) +@@ -9543,8 +9539,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required." + "\n", residual); + +- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, +- residual_probe_offset)); ++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, ++ guard_used_by_caller)); + emit_insn (gen_blockage ()); + } + } +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c +index 0d8a25d73a24..f0ec1389771d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c +@@ -33,7 +33,7 @@ int test1(int z) { + ** ... + ** str x30, \sp\ + ** sub sp, sp, #1040 +-** str xzr, \sp\ ++** str xzr, \sp, #?1024\ + ** cbnz w0, .* + ** bl g + ** ... +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c +index 82447d20fff5..6383bec5ebcd 100644 +--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c +@@ -9,7 +9,7 @@ void g(); + ** ... + ** str x30, \sp\ + ** sub sp, sp, #4064 +-** str xzr, \sp\ ++** str xzr, \sp, #?1024\ + ** cbnz w0, .* + ** bl g + ** ... +@@ -50,7 +50,7 @@ int test1(int z) { + ** ... + ** str x30, \sp\ + ** sub sp, sp, #1040 +-** str xzr, \sp\ ++** str xzr, \sp, #?1024\ + ** cbnz w0, .* + ** bl g + ** ... +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c +index 73ac3e4e4eb0..562039b5e9b8 100644 +--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c +@@ -9,7 +9,7 @@ void g(); + ** ... + ** str x30, \sp\ + ** sub sp, sp, #4064 +-** str xzr, \sp\ ++** str xzr, \sp, #?1024\ + ** cbnz w0, .* + ** bl g + ** ... +@@ -50,7 +50,7 @@ int test1(int z) { + ** ... + ** str x30, \sp\ + ** sub sp, sp, #1040 +-** str xzr, \sp\ ++** str xzr, \sp, #?1024\ + ** cbnz w0, .* + ** bl g + ** ... +-- +2.43.5 +
View file
_service:tar_scm:0113-aarch64-Explicitly-record-probe-registers-in-frame-info.patch
Added
@@ -0,0 +1,277 @@ +From 6f0ab0a9f46a17b68349ff6035aa776bf65f0575 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:08:56 +0100 +Subject: PATCH aarch64: Explicitly record probe registers in frame info + +The stack frame is currently divided into three areas: + +A: the area above the hard frame pointer +B: the SVE saves below the hard frame pointer +C: the outgoing arguments + +If the stack frame is allocated in one chunk, the allocation needs a +probe if the frame size is >= guard_size - 1KiB. In addition, if the +function is not a leaf function, it must probe an address no more than +1KiB above the outgoing SP. We ensured the second condition by + +(1) using single-chunk allocations for non-leaf functions only if + the link register save slot is within 512 bytes of the bottom + of the frame; and + +(2) using the link register save as a probe (meaning, for instance, + that it can't be individually shrink wrapped) + +If instead the stack is allocated in multiple chunks, then: + +* an allocation involving only the outgoing arguments (C above) requires + a probe if the allocation size is > 1KiB + +* any other allocation requires a probe if the allocation size + is >= guard_size - 1KiB + +* second and subsequent allocations require the previous allocation + to probe at the bottom of the allocated area, regardless of the size + of that previous allocation + +The final point means that, unlike for single allocations, +it can be necessary to have both a non-SVE register probe and +an SVE register probe. For example: + +* allocate A, probe using a non-SVE register save +* allocate B, probe using an SVE register save +* allocate C + +The non-SVE register used in this case was again the link register. +It was previously used even if the link register save slot was some +bytes above the bottom of the non-SVE register saves, but an earlier +patch avoided that by putting the link register save slot first. + +As a belt-and-braces fix, this patch explicitly records which +probe registers we're using and allows the non-SVE probe to be +whichever register comes first (as for SVE). + +The patch also avoids unnecessary probes in sve/pcs/stack_clash_3.c. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::sve_save_and_probe) + (aarch64_frame::hard_fp_save_and_probe): New fields. + * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize them. + Rather than asserting that a leaf function saves LR, instead assert + that a leaf function saves something. + (aarch64_get_separate_components): Prevent the chosen probe + registers from being individually shrink-wrapped. + (aarch64_allocate_and_probe_stack_space): Remove workaround for + probe registers that aren't at the bottom of the previous allocation. + +gcc/testsuite/ + * gcc.target/aarch64/sve/pcs/stack_clash_3.c: Avoid redundant probes. +--- + gcc/config/aarch64/aarch64.cc | 68 +++++++++++++++---- + gcc/config/aarch64/aarch64.h | 8 +++ + .../aarch64/sve/pcs/stack_clash_3.c | 6 +- + 3 files changed, 64 insertions(+), 18 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 8abf3d7a1e2b..a8d907df8843 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8210,15 +8210,11 @@ aarch64_layout_frame (void) + && !crtl->abi->clobbers_full_reg_p (regno)) + frame.reg_offsetregno = SLOT_REQUIRED; + +- /* With stack-clash, LR must be saved in non-leaf functions. The saving of +- LR counts as an implicit probe which allows us to maintain the invariant +- described in the comment at expand_prologue. */ +- gcc_assert (crtl->is_leaf +- || maybe_ne (frame.reg_offsetR30_REGNUM, SLOT_NOT_REQUIRED)); + + poly_int64 offset = crtl->outgoing_args_size; + gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); + frame.bytes_below_saved_regs = offset; ++ frame.sve_save_and_probe = INVALID_REGNUM; + + /* Now assign stack slots for the registers. Start with the predicate + registers, since predicate LDR and STR have a relatively small +@@ -8226,6 +8222,8 @@ aarch64_layout_frame (void) + for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++) + if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) + { ++ if (frame.sve_save_and_probe == INVALID_REGNUM) ++ frame.sve_save_and_probe = regno; + frame.reg_offsetregno = offset; + offset += BYTES_PER_SVE_PRED; + } +@@ -8263,6 +8261,8 @@ aarch64_layout_frame (void) + for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) + if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) + { ++ if (frame.sve_save_and_probe == INVALID_REGNUM) ++ frame.sve_save_and_probe = regno; + frame.reg_offsetregno = offset; + offset += vector_save_size; + } +@@ -8272,10 +8272,18 @@ aarch64_layout_frame (void) + frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; + bool saves_below_hard_fp_p + = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); ++ gcc_assert (!saves_below_hard_fp_p ++ || (frame.sve_save_and_probe != INVALID_REGNUM ++ && known_eq (frame.reg_offsetframe.sve_save_and_probe, ++ frame.bytes_below_saved_regs))); ++ + frame.bytes_below_hard_fp = offset; ++ frame.hard_fp_save_and_probe = INVALID_REGNUM; + + auto allocate_gpr_slot = &(unsigned int regno) + { ++ if (frame.hard_fp_save_and_probe == INVALID_REGNUM) ++ frame.hard_fp_save_and_probe = regno; + frame.reg_offsetregno = offset; + if (frame.wb_push_candidate1 == INVALID_REGNUM) + frame.wb_push_candidate1 = regno; +@@ -8309,6 +8317,8 @@ aarch64_layout_frame (void) + for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) + if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) + { ++ if (frame.hard_fp_save_and_probe == INVALID_REGNUM) ++ frame.hard_fp_save_and_probe = regno; + /* If there is an alignment gap between integer and fp callee-saves, + allocate the last fp register to it if possible. */ + if (regno == last_fp_reg +@@ -8332,6 +8342,17 @@ aarch64_layout_frame (void) + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); + + frame.saved_regs_size = offset - frame.bytes_below_saved_regs; ++ gcc_assert (known_eq (frame.saved_regs_size, ++ frame.below_hard_fp_saved_regs_size) ++ || (frame.hard_fp_save_and_probe != INVALID_REGNUM ++ && known_eq (frame.reg_offsetframe.hard_fp_save_and_probe, ++ frame.bytes_below_hard_fp))); ++ ++ /* With stack-clash, a register must be saved in non-leaf functions. ++ The saving of the bottommost register counts as an implicit probe, ++ which allows us to maintain the invariant described in the comment ++ at expand_prologue. */ ++ gcc_assert (crtl->is_leaf || maybe_ne (frame.saved_regs_size, 0)); + + offset += get_frame_size (); + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); +@@ -8462,6 +8483,25 @@ aarch64_layout_frame (void) + frame.final_adjust = frame.bytes_below_saved_regs; + } + ++ /* The frame is allocated in pieces, with each non-final piece ++ including a register save at offset 0 that acts as a probe for ++ the following piece. In addition, the save of the bottommost register ++ acts as a probe for callees and allocas. Roll back any probes that ++ aren't needed. ++ ++ A probe isn't needed if it is associated with the final allocation ++ (including callees and allocas) that happens before the epilogue is ++ executed. */ ++ if (crtl->is_leaf ++ && !cfun->calls_alloca ++ && known_eq (frame.final_adjust, 0)) ++ { ++ if (maybe_ne (frame.sve_callee_adjust, 0)) ++ frame.sve_save_and_probe = INVALID_REGNUM; ++ else ++ frame.hard_fp_save_and_probe = INVALID_REGNUM; ++ } ++ + /* Make sure the individual adjustments add up to the full frame size. */ + gcc_assert (known_eq (frame.initial_adjust + + frame.callee_adjust +@@ -9039,13 +9079,6 @@ aarch64_get_separate_components (void) + + poly_int64 offset = frame.reg_offsetregno; + +- /* If the register is saved in the first SVE save slot, we use +- it as a stack probe for -fstack-clash-protection. */ +- if (flag_stack_clash_protection +- && maybe_ne (frame.below_hard_fp_saved_regs_size, 0) +- && known_eq (offset, frame.bytes_below_saved_regs)) +- continue; +- + /* Get the offset relative to the register we'll use. */ + if (frame_pointer_needed) + offset -= frame.bytes_below_hard_fp; +@@ -9080,6 +9113,13 @@ aarch64_get_separate_components (void) + + bitmap_clear_bit (components, LR_REGNUM); + bitmap_clear_bit (components, SP_REGNUM); ++ if (flag_stack_clash_protection) ++ { ++ if (frame.sve_save_and_probe != INVALID_REGNUM) ++ bitmap_clear_bit (components, frame.sve_save_and_probe); ++ if (frame.hard_fp_save_and_probe != INVALID_REGNUM) ++ bitmap_clear_bit (components, frame.hard_fp_save_and_probe); ++ } + + return components; + } +@@ -9616,8 +9656,8 @@ aarch64_epilogue_uses (int regno) + When probing is needed, we emit a probe at the start of the prologue + and every PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE bytes thereafter. + +- We have to track how much space has been allocated and the only stores +- to the stack we track as implicit probes are the FP/LR stores. ++ We can also use register saves as probes. These are stored in ++ sve_save_and_probe and hard_fp_save_and_probe. + + For outgoing arguments we probe if the size is larger than 1KB, such that + the ABI specified buffer is maintained for the next callee. +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index b61358370732..46d4693e2064 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -957,6 +957,14 @@ struct GTY (()) aarch64_frame + This is the register they should use. */ + unsigned spare_pred_reg; + ++ /* An SVE register that is saved below the hard frame pointer and that acts ++ as a probe for later allocations, or INVALID_REGNUM if none. */ ++ unsigned sve_save_and_probe; ++ ++ /* A register that is saved at the hard frame pointer and that acts ++ as a probe for later allocations, or INVALID_REGNUM if none. */ ++ unsigned hard_fp_save_and_probe; ++ + bool laid_out; + + /* True if shadow call stack should be enabled for the current function. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c +index 3e01ec36c3a4..3530a0d504ba 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c +@@ -11,11 +11,10 @@ + ** mov x11, sp + ** ... + ** sub sp, sp, x13 +-** str p4, \sp\ + ** cbz w0, ^\n* ++** str p4, \sp\ + ** ... + ** ptrue p0\.b, all +-** ldr p4, \sp\ + ** addvl sp, sp, #1 + ** ldr x24, \sp\, 32 + ** ret +@@ -39,13 +38,12 @@ test_1 (int n) + ** mov x11, sp + ** ... + ** sub sp, sp, x13 +-** str p4, \sp\ + ** cbz w0, ^\n* ++** str p4, \sp\ + ** str p5, \sp, #1, mul vl\ + ** str p6, \sp, #2, mul vl\ + ** ... + ** ptrue p0\.b, all +-** ldr p4, \sp\ + ** addvl sp, sp, #1 + ** ldr x24, \sp\, 32 + ** ret +-- +2.43.5 +
View file
_service:tar_scm:0114-aarch64-Remove-below-hard-fp-saved-regs-size.patch
Added
@@ -0,0 +1,157 @@ +From 8254e1b9cd500e0c278465a3657543477e9d1250 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:08:56 +0100 +Subject: PATCH aarch64: Remove below_hard_fp_saved_regs_size + +After previous patches, it's no longer necessary to store +saved_regs_size and below_hard_fp_saved_regs_size in the frame info. +All measurements instead use the top or bottom of the frame as +reference points. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::saved_regs_size) + (aarch64_frame::below_hard_fp_saved_regs_size): Delete. + * config/aarch64/aarch64.cc (aarch64_layout_frame): Update accordingly. +--- + gcc/config/aarch64/aarch64.cc | 45 ++++++++++++++++------------------- + gcc/config/aarch64/aarch64.h | 7 ------ + 2 files changed, 21 insertions(+), 31 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index a8d907df8843..ac3d3b336a37 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8269,9 +8269,8 @@ aarch64_layout_frame (void) + + /* OFFSET is now the offset of the hard frame pointer from the bottom + of the callee save area. */ +- frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; +- bool saves_below_hard_fp_p +- = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); ++ auto below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; ++ bool saves_below_hard_fp_p = maybe_ne (below_hard_fp_saved_regs_size, 0); + gcc_assert (!saves_below_hard_fp_p + || (frame.sve_save_and_probe != INVALID_REGNUM + && known_eq (frame.reg_offsetframe.sve_save_and_probe, +@@ -8341,9 +8340,8 @@ aarch64_layout_frame (void) + + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); + +- frame.saved_regs_size = offset - frame.bytes_below_saved_regs; +- gcc_assert (known_eq (frame.saved_regs_size, +- frame.below_hard_fp_saved_regs_size) ++ auto saved_regs_size = offset - frame.bytes_below_saved_regs; ++ gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size) + || (frame.hard_fp_save_and_probe != INVALID_REGNUM + && known_eq (frame.reg_offsetframe.hard_fp_save_and_probe, + frame.bytes_below_hard_fp))); +@@ -8352,7 +8350,7 @@ aarch64_layout_frame (void) + The saving of the bottommost register counts as an implicit probe, + which allows us to maintain the invariant described in the comment + at expand_prologue. */ +- gcc_assert (crtl->is_leaf || maybe_ne (frame.saved_regs_size, 0)); ++ gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0)); + + offset += get_frame_size (); + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); +@@ -8409,7 +8407,7 @@ aarch64_layout_frame (void) + + HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp; + HOST_WIDE_INT const_saved_regs_size; +- if (known_eq (frame.saved_regs_size, 0)) ++ if (known_eq (saved_regs_size, 0)) + frame.initial_adjust = frame.frame_size; + else if (frame.frame_size.is_constant (&const_size) + && const_size < max_push_offset +@@ -8422,7 +8420,7 @@ aarch64_layout_frame (void) + frame.callee_adjust = const_size; + } + else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs) +- && frame.saved_regs_size.is_constant (&const_saved_regs_size) ++ && saved_regs_size.is_constant (&const_saved_regs_size) + && const_below_saved_regs + const_saved_regs_size < 512 + /* We could handle this case even with data below the saved + registers, provided that that data left us with valid offsets +@@ -8441,8 +8439,7 @@ aarch64_layout_frame (void) + frame.initial_adjust = frame.frame_size; + } + else if (saves_below_hard_fp_p +- && known_eq (frame.saved_regs_size, +- frame.below_hard_fp_saved_regs_size)) ++ && known_eq (saved_regs_size, below_hard_fp_saved_regs_size)) + { + /* Frame in which all saves are SVE saves: + +@@ -8464,7 +8461,7 @@ aarch64_layout_frame (void) + save SVE registers relative to SP + sub sp, sp, bytes_below_saved_regs */ + frame.callee_adjust = const_above_fp; +- frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; ++ frame.sve_callee_adjust = below_hard_fp_saved_regs_size; + frame.final_adjust = frame.bytes_below_saved_regs; + } + else +@@ -8479,7 +8476,7 @@ aarch64_layout_frame (void) + save SVE registers relative to SP + sub sp, sp, bytes_below_saved_regs */ + frame.initial_adjust = frame.bytes_above_hard_fp; +- frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; ++ frame.sve_callee_adjust = below_hard_fp_saved_regs_size; + frame.final_adjust = frame.bytes_below_saved_regs; + } + +@@ -9621,17 +9618,17 @@ aarch64_epilogue_uses (int regno) + | local variables | <-- frame_pointer_rtx + | | + +-------------------------------+ +- | padding | \ +- +-------------------------------+ | +- | callee-saved registers | | frame.saved_regs_size +- +-------------------------------+ | +- | LR' | | +- +-------------------------------+ | +- | FP' | | +- +-------------------------------+ |<- hard_frame_pointer_rtx (aligned) +- | SVE vector registers | | \ +- +-------------------------------+ | | below_hard_fp_saved_regs_size +- | SVE predicate registers | / / ++ | padding | ++ +-------------------------------+ ++ | callee-saved registers | ++ +-------------------------------+ ++ | LR' | ++ +-------------------------------+ ++ | FP' | ++ +-------------------------------+ <-- hard_frame_pointer_rtx (aligned) ++ | SVE vector registers | ++ +-------------------------------+ ++ | SVE predicate registers | + +-------------------------------+ + | dynamic allocation | + +-------------------------------+ +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 46d4693e2064..01f7751bc783 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -871,18 +871,11 @@ struct GTY (()) aarch64_frame + STACK_BOUNDARY. */ + HOST_WIDE_INT saved_varargs_size; + +- /* The size of the callee-save registers with a slot in REG_OFFSET. */ +- poly_int64 saved_regs_size; +- + /* The number of bytes between the bottom of the static frame (the bottom + of the outgoing arguments) and the bottom of the register save area. + This value is always a multiple of STACK_BOUNDARY. */ + poly_int64 bytes_below_saved_regs; + +- /* The size of the callee-save registers with a slot in REG_OFFSET that +- are saved below the hard frame pointer. */ +- poly_int64 below_hard_fp_saved_regs_size; +- + /* The number of bytes between the bottom of the static frame (the bottom + of the outgoing arguments) and the hard frame pointer. This value is + always a multiple of STACK_BOUNDARY. */ +-- +2.43.5 +
View file
_service:tar_scm:0115-aarch64-Make-stack-smash-canary-protect-saved-registers.patch
Added
@@ -0,0 +1,298 @@ +From 75c37e031408262263442f5b4cdb83d3777b6422 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:08:57 +0100 +Subject: PATCH aarch64: Make stack smash canary protect saved registers + +AArch64 normally puts the saved registers near the bottom of the frame, +immediately above any dynamic allocations. But this means that a +stack-smash attack on those dynamic allocations could overwrite the +saved registers without needing to reach as far as the stack smash +canary. + +The same thing could also happen for variable-sized arguments that are +passed by value, since those are allocated before a call and popped on +return. + +This patch avoids that by putting the locals (and thus the canary) below +the saved registers when stack smash protection is active. + +The patch fixes CVE-2023-4039. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_save_regs_above_locals_p): + New function. + (aarch64_layout_frame): Use it to decide whether locals should + go above or below the saved registers. + (aarch64_expand_prologue): Update stack layout comment. + Emit a stack tie after the final adjustment. + +gcc/testsuite/ + * gcc.target/aarch64/stack-protector-8.c: New test. + * gcc.target/aarch64/stack-protector-9.c: Likewise. +--- + gcc/config/aarch64/aarch64.cc | 46 +++++++-- + .../gcc.target/aarch64/stack-protector-8.c | 95 +++++++++++++++++++ + .../gcc.target/aarch64/stack-protector-9.c | 33 +++++++ + 3 files changed, 168 insertions(+), 6 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-9.c + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index ac3d3b336a37..96c3f48fdc49 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8133,6 +8133,20 @@ aarch64_needs_frame_chain (void) + return aarch64_use_frame_pointer; + } + ++/* Return true if the current function should save registers above ++ the locals area, rather than below it. */ ++ ++static bool ++aarch64_save_regs_above_locals_p () ++{ ++ /* When using stack smash protection, make sure that the canary slot ++ comes between the locals and the saved registers. Otherwise, ++ it would be possible for a carefully sized smash attack to change ++ the saved registers (particularly LR and FP) without reaching the ++ canary. */ ++ return crtl->stack_protect_guard; ++} ++ + /* Mark the registers that need to be saved by the callee and calculate + the size of the callee-saved registers area and frame record (both FP + and LR may be omitted). */ +@@ -8144,6 +8158,7 @@ aarch64_layout_frame (void) + poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode); + bool frame_related_fp_reg_p = false; + aarch64_frame &frame = cfun->machine->frame; ++ poly_int64 top_of_locals = -1; + + frame.emit_frame_chain = aarch64_needs_frame_chain (); + +@@ -8210,9 +8225,16 @@ aarch64_layout_frame (void) + && !crtl->abi->clobbers_full_reg_p (regno)) + frame.reg_offsetregno = SLOT_REQUIRED; + ++ bool regs_at_top_p = aarch64_save_regs_above_locals_p (); + + poly_int64 offset = crtl->outgoing_args_size; + gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); ++ if (regs_at_top_p) ++ { ++ offset += get_frame_size (); ++ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); ++ top_of_locals = offset; ++ } + frame.bytes_below_saved_regs = offset; + frame.sve_save_and_probe = INVALID_REGNUM; + +@@ -8352,15 +8374,18 @@ aarch64_layout_frame (void) + at expand_prologue. */ + gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0)); + +- offset += get_frame_size (); +- offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); +- auto top_of_locals = offset; +- ++ if (!regs_at_top_p) ++ { ++ offset += get_frame_size (); ++ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); ++ top_of_locals = offset; ++ } + offset += frame.saved_varargs_size; + gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); + frame.frame_size = offset; + + frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp; ++ gcc_assert (known_ge (top_of_locals, 0)); + frame.bytes_above_locals = frame.frame_size - top_of_locals; + + frame.initial_adjust = 0; +@@ -9615,10 +9640,10 @@ aarch64_epilogue_uses (int regno) + | for register varargs | + | | + +-------------------------------+ +- | local variables | <-- frame_pointer_rtx ++ | local variables (1) | <-- frame_pointer_rtx + | | + +-------------------------------+ +- | padding | ++ | padding (1) | + +-------------------------------+ + | callee-saved registers | + +-------------------------------+ +@@ -9630,6 +9655,10 @@ aarch64_epilogue_uses (int regno) + +-------------------------------+ + | SVE predicate registers | + +-------------------------------+ ++ | local variables (2) | ++ +-------------------------------+ ++ | padding (2) | ++ +-------------------------------+ + | dynamic allocation | + +-------------------------------+ + | padding | +@@ -9639,6 +9668,9 @@ aarch64_epilogue_uses (int regno) + +-------------------------------+ + | | <-- stack_pointer_rtx (aligned) + ++ The regions marked (1) and (2) are mutually exclusive. (2) is used ++ when aarch64_save_regs_above_locals_p is true. ++ + Dynamic stack allocations via alloca() decrease stack_pointer_rtx + but leave frame_pointer_rtx and hard_frame_pointer_rtx + unchanged. +@@ -9834,6 +9866,8 @@ aarch64_expand_prologue (void) + gcc_assert (known_eq (bytes_below_sp, final_adjust)); + aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust, + !frame_pointer_needed, true); ++ if (emit_frame_chain && maybe_ne (final_adjust, 0)) ++ emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); + } + + /* Return TRUE if we can use a simple_return insn. +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c +new file mode 100644 +index 000000000000..e71d820e3654 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c +@@ -0,0 +1,95 @@ ++/* { dg-options " -O -fstack-protector-strong -mstack-protector-guard=sysreg -mstack-protector-guard-reg=tpidr2_el0 -mstack-protector-guard-offset=16" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++void g(void *); ++__SVBool_t *h(void *); ++ ++/* ++** test1: ++** sub sp, sp, #288 ++** stp x29, x30, \sp, #?272\ ++** add x29, sp, #?272 ++** mrs (x0-9+), tpidr2_el0 ++** ldr (x0-9+), \\1, #?16\ ++** str \2, \sp, #?264\ ++** mov \2, #?0 ++** add x0, sp, #?8 ++** bl g ++** ... ++** mrs .* ++** ... ++** bne .* ++** ... ++** ldp x29, x30, \sp, #?272\ ++** add sp, sp, #?288 ++** ret ++** bl __stack_chk_fail ++*/ ++int test1() { ++ int y0x40; ++ g(y); ++ return 1; ++} ++ ++/* ++** test2: ++** stp x29, x30, \sp, #?-16\! ++** mov x29, sp ++** sub sp, sp, #1040 ++** mrs (x0-9+), tpidr2_el0 ++** ldr (x0-9+), \\1, #?16\ ++** str \2, \sp, #?1032\ ++** mov \2, #?0 ++** add x0, sp, #?8 ++** bl g ++** ... ++** mrs .* ++** ... ++** bne .* ++** ... ++** add sp, sp, #?1040 ++** ldp x29, x30, \sp\, #?16 ++** ret ++** bl __stack_chk_fail ++*/ ++int test2() { ++ int y0x100; ++ g(y); ++ return 1; ++} ++ ++#pragma GCC target "+sve" ++ ++/* ++** test3: ++** stp x29, x30, \sp, #?-16\! ++** mov x29, sp ++** addvl sp, sp, #-18 ++** ... ++** str p4, \sp\ ++** ... ++** sub sp, sp, #272 ++** mrs (x0-9+), tpidr2_el0 ++** ldr (x0-9+), \\1, #?16\ ++** str \2, \sp, #?264\ ++** mov \2, #?0 ++** add x0, sp, #?8 ++** bl h ++** ... ++** mrs .* ++** ... ++** bne .* ++** ... ++** add sp, sp, #?272 ++** ... ++** ldr p4, \sp\ ++** ... ++** addvl sp, sp, #18 ++** ldp x29, x30, \sp\, #?16 ++** ret ++** bl __stack_chk_fail ++*/ ++__SVBool_t test3() { ++ int y0x40; ++ return *h(y); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c +new file mode 100644 +index 000000000000..58f322aa480a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c +@@ -0,0 +1,33 @@ ++/* { dg-options "-O2 -mcpu=neoverse-v1 -fstack-protector-all" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++/* ++** main: ++** ... ++** stp x29, x30, \sp, #?-0-9+\! ++** ... ++** sub sp, sp, #0-9+ ++** ... ++** str x0-9+, \x29, #?-8\ ++** ... ++*/ ++int f(const char *); ++void g(void *); ++int main(int argc, char* argv) ++{ ++ int a; ++ int b; ++ char c2+f(argv1); ++ int d0x100; ++ char y; ++ ++ y=42; a=4; b=10; ++ c0 = 'h'; c1 = '\0'; ++ ++ cf(argv2) = '\0'; ++ ++ __builtin_printf("%d %d\n%s\n", a, b, c); ++ g(d); ++ ++ return 0; ++} +-- +2.43.5 +
View file
_service:tar_scm:0116-aarch64-Fix-return-register-handling-in-untyped_call.patch
Added
@@ -0,0 +1,66 @@ +From 38d0605ac8bc90324170041676fc05e7e595769e Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Wed, 20 Sep 2023 11:13:19 +0100 +Subject: PATCH aarch64: Fix return register handling in untyped_call + +While working on another patch, I hit a problem with the aarch64 +expansion of untyped_call. The expander emits the usual: + + (set (mem ...) (reg resN)) + +instructions to store the result registers to memory, but it didn't +say in RTL where those resN results came from. This eventually led +to a failure of gcc.dg/torture/stackalign/builtin-return-2.c, +via regrename. + +This patch turns the untyped call from a plain call to a call_value, +to represent that the call returns (or might return) a useful value. +The patch also uses a PARALLEL return rtx to represent all the possible +return registers. + +gcc/ + * config/aarch64/aarch64.md (untyped_call): Emit a call_value + rather than a call. List each possible destination register + in the call pattern. + +(cherry picked from commit 629efe27744d13c3b83bbe8338b84c37c83dbe4f) +--- + gcc/config/aarch64/aarch64.md | 20 +++++++++++++++++++- + 1 file changed, 19 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index b9b3ba4f9164..cd568f3daa0f 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -1159,9 +1159,27 @@ + { + int i; + ++ /* Generate a PARALLEL that contains all of the register results. ++ The offsets are somewhat arbitrary, since we don't know the ++ actual return type. The main thing we need to avoid is having ++ overlapping byte ranges, since those might give the impression ++ that two registers are known to have data in common. */ ++ rtvec rets = rtvec_alloc (XVECLEN (operands2, 0)); ++ poly_int64 offset = 0; ++ for (i = 0; i < XVECLEN (operands2, 0); i++) ++ { ++ rtx reg = SET_SRC (XVECEXP (operands2, 0, i)); ++ gcc_assert (REG_P (reg)); ++ rtx offset_rtx = gen_int_mode (offset, Pmode); ++ rtx piece = gen_rtx_EXPR_LIST (VOIDmode, reg, offset_rtx); ++ RTVEC_ELT (rets, i) = piece; ++ offset += GET_MODE_SIZE (GET_MODE (reg)); ++ } ++ rtx ret = gen_rtx_PARALLEL (VOIDmode, rets); ++ + /* Untyped calls always use the default ABI. It's only possible to use + ABI variants if we know the type of the target function. */ +- emit_call_insn (gen_call (operands0, const0_rtx, const0_rtx)); ++ emit_call_insn (gen_call_value (ret, operands0, const0_rtx, const0_rtx)); + + for (i = 0; i < XVECLEN (operands2, 0); i++) + { +-- +2.43.5 +
View file
_service:tar_scm:0117-aarch64-Fix-loose-ldpstp-check.patch
Added
@@ -0,0 +1,119 @@ +From 74f99f1adc696f446115f36974a3f94f66294a53 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Wed, 20 Sep 2023 11:13:20 +0100 +Subject: PATCH aarch64: Fix loose ldpstp check PR111411 + +aarch64_operands_ok_for_ldpstp contained the code: + + /* One of the memory accesses must be a mempair operand. + If it is not the first one, they need to be swapped by the + peephole. */ + if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1)) + && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2))) + return false; + +But the requirement isn't just that one of the accesses must be a +valid mempair operand. It's that the lower access must be, since +that's the access that will be used for the instruction operand. + +gcc/ + PR target/111411 + * config/aarch64/aarch64.cc (aarch64_operands_ok_for_ldpstp): Require + the lower memory access to a mem-pair operand. + +gcc/testsuite/ + PR target/111411 + * gcc.dg/rtl/aarch64/pr111411.c: New test. + +(cherry picked from commit 2d38f45bcca62ca0c7afef4b579f82c5c2a01610) +--- + gcc/config/aarch64/aarch64.cc | 8 ++- + gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c | 57 +++++++++++++++++++++ + 2 files changed, 60 insertions(+), 5 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 96c3f48fdc49..a979accd90a9 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -26031,11 +26031,9 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, + gcc_assert (known_eq (GET_MODE_SIZE (GET_MODE (mem_1)), + GET_MODE_SIZE (GET_MODE (mem_2)))); + +- /* One of the memory accesses must be a mempair operand. +- If it is not the first one, they need to be swapped by the +- peephole. */ +- if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1)) +- && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2))) ++ /* The lower memory access must be a mem-pair operand. */ ++ rtx lower_mem = reversed ? mem_2 : mem_1; ++ if (!aarch64_mem_pair_operand (lower_mem, GET_MODE (lower_mem))) + return false; + + if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1))) +diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c b/gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c +new file mode 100644 +index 000000000000..ad07e9c6c893 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c +@@ -0,0 +1,57 @@ ++/* { dg-do compile { target aarch64*-*-* } } */ ++/* { dg-require-effective-target lp64 } */ ++/* { dg-options "-O -fdisable-rtl-postreload -fpeephole2 -fno-schedule-fusion" } */ ++ ++extern int data; ++ ++void __RTL (startwith ("ira")) foo (void *ptr) ++{ ++ (function "foo" ++ (param "ptr" ++ (DECL_RTL (reg/v:DI <0> ptr )) ++ (DECL_RTL_INCOMING (reg/v:DI x0 ptr )) ++ ) ;; param "ptr" ++ (insn-chain ++ (block 2 ++ (edge-from entry (flags "FALLTHRU")) ++ (cnote 3 bb 2 NOTE_INSN_BASIC_BLOCK) ++ (insn 4 (set (reg:DI <0>) (reg:DI x0))) ++ (insn 5 (set (reg:DI <1>) ++ (plus:DI (reg:DI <0>) (const_int 768)))) ++ (insn 6 (set (mem:SI (plus:DI (reg:DI <0>) ++ (const_int 508)) 1 &data+508 S4 A4) ++ (const_int 0))) ++ (insn 7 (set (mem:SI (plus:DI (reg:DI <1>) ++ (const_int -256)) 1 &data+512 S4 A4) ++ (const_int 0))) ++ (edge-to exit (flags "FALLTHRU")) ++ ) ;; block 2 ++ ) ;; insn-chain ++ ) ;; function ++} ++ ++void __RTL (startwith ("ira")) bar (void *ptr) ++{ ++ (function "bar" ++ (param "ptr" ++ (DECL_RTL (reg/v:DI <0> ptr )) ++ (DECL_RTL_INCOMING (reg/v:DI x0 ptr )) ++ ) ;; param "ptr" ++ (insn-chain ++ (block 2 ++ (edge-from entry (flags "FALLTHRU")) ++ (cnote 3 bb 2 NOTE_INSN_BASIC_BLOCK) ++ (insn 4 (set (reg:DI <0>) (reg:DI x0))) ++ (insn 5 (set (reg:DI <1>) ++ (plus:DI (reg:DI <0>) (const_int 768)))) ++ (insn 6 (set (mem:SI (plus:DI (reg:DI <1>) ++ (const_int -256)) 1 &data+512 S4 A4) ++ (const_int 0))) ++ (insn 7 (set (mem:SI (plus:DI (reg:DI <0>) ++ (const_int 508)) 1 &data+508 S4 A4) ++ (const_int 0))) ++ (edge-to exit (flags "FALLTHRU")) ++ ) ;; block 2 ++ ) ;; insn-chain ++ ) ;; function ++} +-- +2.43.5 +
View file
_service
Changed
@@ -2,7 +2,7 @@ <service name="tar_scm"> <param name="scm">git</param> <param name="url">git@gitee.com:src-openeuler/gcc.git</param> - <param name="revision">openEuler-24.03-LTS-Next</param> + <param name="revision">openEuler-24.03-LTS-SP1</param> <param name="exclude">*</param> <param name="extract">*</param> </service>
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2