Projects
openEuler:24.03
llvm
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 15
View file
_service:tar_scm:llvm.spec
Changed
@@ -38,7 +38,7 @@ Name: %{pkg_name} Version: %{maj_ver}.%{min_ver}.%{patch_ver} -Release: 8 +Release: 10 Summary: The Low Level Virtual Machine License: NCSA @@ -65,8 +65,10 @@ Patch14: 0014-Backport-X86-Inline-Skip-inline-asm-in-inlining-targ.patch Patch15: 0015-Backport-ARM-Check-all-terms-in-emitPopInst-when-clearing-Res.patch Patch16: 0016-Backport-ARM-Update-IsRestored-for-LR-based-on-all-returns-82.patch -Patch17: 0017-Add-the-support-for-classic-flang.patch -Patch18: 0018-Fix-declaration-definition-mismatch-for-classic-flang.patch +Patch17: 0017-Add-the-support-for-classic-flang.patch +Patch18: 0018-Fix-declaration-definition-mismatch-for-classic-flang.patch +Patch19: 0019-Backport-LoongArch-Improve-the-support-for-atomic-and-clear_cache.patch +Patch20: 0020-Update-llvm-lit-config-to-support-build_for_openeule.patch BuildRequires: binutils-devel BuildRequires: cmake @@ -351,6 +353,12 @@ %{install_includedir}/llvm-gmock %changelog +* Mon Apr 29 2024 wangqiang <wangqiang1@kylinos.cn> - 17.0.6-10 +- Update llvm-lit config to support macro `build_for_openeuler` + +* Sun Apr 21 2024 zhanglimin <zhanglimin@loongson.cn> - 17.0.6-9 +- Improve the support for atomic and __clear_cache + * Wed Apr 17 2024 luofeng <luofeng13@huawei.com> - 17.0.6-8 - Add the support for classic flang @@ -375,7 +383,7 @@ * Thu Nov 30 2023 zhoujing <zhoujing106@huawei.com> - 17.0.6-1 - Update to 17.0.6 -* Tue Jul 13 2023 cf-zhao <zhaochuanfeng@huawei.com> -12.0.1-7 +* Thu Jul 13 2023 cf-zhao <zhaochuanfeng@huawei.com> -12.0.1-7 - Disable check. * Sat Jul 08 2023 cf-zhao <zhaochuanfeng@huawei.com> -12.0.1-6
View file
_service:tar_scm:0019-Backport-LoongArch-Improve-the-support-for-atomic-and-clear_cache.patch
Added
@@ -0,0 +1,12426 @@ +From ad367d826e5959792ce7384be62ba1ccffbf0d9a Mon Sep 17 00:00:00 2001 +From: hev <wangrui@loongson.cn> +Date: Wed, 11 Oct 2023 10:24:18 +0800 +Subject: PATCH 1/7 LoongArch Improve codegen for atomic ops (#67391) + +This PR improves memory barriers generated by atomic operations. + +Memory barrier semantics of LL/SC: +``` +LL: <memory-barrier> + <load-exclusive> +SC: <store-conditional> + <memory-barrier> +``` + +Changes: +* Remove unnecessary memory barriers before LL and between LL/SC. +* Fix acquire semantics. (If the SC instruction is not executed, then +the guarantee of acquiring semantics cannot be ensured. Therefore, an +acquire barrier needs to be generated when memory ordering includes an +acquire operation.) + +(cherry picked from commit 203ba238e33c570dba6cbcf247f1668bb2a13c26) +--- + .../LoongArchExpandAtomicPseudoInsts.cpp | 50 +-- + .../Target/LoongArch/LoongArchInstrInfo.td | 24 +- + .../LoongArch/atomicrmw-uinc-udec-wrap.ll | 24 +- + .../ir-instruction/atomic-cmpxchg.ll | 376 ++++++++++++++++-- + .../LoongArch/ir-instruction/atomicrmw-fp.ll | 24 +- + .../ir-instruction/atomicrmw-minmax.ll | 24 -- + .../LoongArch/ir-instruction/atomicrmw.ll | 31 -- + llvm/unittests/Target/LoongArch/InstSizes.cpp | 2 +- + 8 files changed, 407 insertions(+), 148 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +index 51df0463e235..eb78ef065b21 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +@@ -153,18 +153,12 @@ static void doAtomicBinOpExpansion(const LoongArchInstrInfo *TII, + Register ScratchReg = MI.getOperand(1).getReg(); + Register AddrReg = MI.getOperand(2).getReg(); + Register IncrReg = MI.getOperand(3).getReg(); +- AtomicOrdering Ordering = +- static_cast<AtomicOrdering>(MI.getOperand(4).getImm()); + + // .loop: +- // if(Ordering != AtomicOrdering::Monotonic) +- // dbar 0 + // ll.w|d dest, (addr) + // binop scratch, dest, val + // sc.w|d scratch, scratch, (addr) + // beqz scratch, loop +- if (Ordering != AtomicOrdering::Monotonic) +- BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopMBB, DL, + TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg) + .addReg(AddrReg) +@@ -251,12 +245,8 @@ static void doMaskedAtomicBinOpExpansion( + Register AddrReg = MI.getOperand(2).getReg(); + Register IncrReg = MI.getOperand(3).getReg(); + Register MaskReg = MI.getOperand(4).getReg(); +- AtomicOrdering Ordering = +- static_cast<AtomicOrdering>(MI.getOperand(5).getImm()); + + // .loop: +- // if(Ordering != AtomicOrdering::Monotonic) +- // dbar 0 + // ll.w destreg, (alignedaddr) + // binop scratch, destreg, incr + // xor scratch, destreg, scratch +@@ -264,8 +254,6 @@ static void doMaskedAtomicBinOpExpansion( + // xor scratch, destreg, scratch + // sc.w scratch, scratch, (alignedaddr) + // beqz scratch, loop +- if (Ordering != AtomicOrdering::Monotonic) +- BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopMBB, DL, TII->get(LoongArch::LL_W), DestReg) + .addReg(AddrReg) + .addImm(0); +@@ -372,23 +360,20 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( + auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); +- auto TailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + // Insert new MBBs. + MF->insert(++MBB.getIterator(), LoopHeadMBB); + MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB); + MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB); +- MF->insert(++LoopTailMBB->getIterator(), TailMBB); +- MF->insert(++TailMBB->getIterator(), DoneMBB); ++ MF->insert(++LoopTailMBB->getIterator(), DoneMBB); + + // Set up successors and transfer remaining instructions to DoneMBB. + LoopHeadMBB->addSuccessor(LoopIfBodyMBB); + LoopHeadMBB->addSuccessor(LoopTailMBB); + LoopIfBodyMBB->addSuccessor(LoopTailMBB); + LoopTailMBB->addSuccessor(LoopHeadMBB); +- LoopTailMBB->addSuccessor(TailMBB); +- TailMBB->addSuccessor(DoneMBB); ++ LoopTailMBB->addSuccessor(DoneMBB); + DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end()); + DoneMBB->transferSuccessors(&MBB); + MBB.addSuccessor(LoopHeadMBB); +@@ -402,11 +387,9 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( + + // + // .loophead: +- // dbar 0 + // ll.w destreg, (alignedaddr) + // and scratch2, destreg, mask + // move scratch1, destreg +- BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_W), DestReg) + .addReg(AddrReg) + .addImm(0); +@@ -463,7 +446,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( + // .looptail: + // sc.w scratch1, scratch1, (addr) + // beqz scratch1, loop +- // dbar 0x700 + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), Scratch1Reg) + .addReg(Scratch1Reg) + .addReg(AddrReg) +@@ -472,10 +454,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( + .addReg(Scratch1Reg) + .addMBB(LoopHeadMBB); + +- // .tail: +- // dbar 0x700 +- BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700); +- + NextMBBI = MBB.end(); + MI.eraseFromParent(); + +@@ -483,7 +461,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( + computeAndAddLiveIns(LiveRegs, *LoopHeadMBB); + computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB); + computeAndAddLiveIns(LiveRegs, *LoopTailMBB); +- computeAndAddLiveIns(LiveRegs, *TailMBB); + computeAndAddLiveIns(LiveRegs, *DoneMBB); + + return true; +@@ -535,12 +512,10 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( + .addReg(CmpValReg) + .addMBB(TailMBB); + // .looptail: +- // dbar 0 + // move scratch, newval + // sc.w|d scratch, scratch, (addr) + // beqz scratch, loophead + // b done +- BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg) + .addReg(NewValReg) + .addReg(LoongArch::R0); +@@ -573,13 +548,11 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( + .addMBB(TailMBB); + + // .looptail: +- // dbar 0 + // andn scratch, dest, mask + // or scratch, scratch, newval + // sc.w|d scratch, scratch, (addr) + // beqz scratch, loophead + // b done +- BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::ANDN), ScratchReg) + .addReg(DestReg) + .addReg(MaskReg); +@@ -598,9 +571,24 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB); + } + ++ AtomicOrdering Ordering = ++ static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm()); ++ int hint; ++ ++ switch (Ordering) { ++ case AtomicOrdering::Acquire: ++ case AtomicOrdering::AcquireRelease: ++ case AtomicOrdering::SequentiallyConsistent: ++ // TODO: acquire ++ hint = 0; ++ break; ++ default: ++ hint = 0x700; ++ } ++ + // .tail: +- // dbar 0x700 +- BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700); ++ // dbar 0x700 | acquire ++ BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(hint); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index 05ae36a9781d..a9b0db30c2f6 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -1731,7 +1731,7 @@ def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax; + + class PseudoCmpXchg + : Pseudo<(outs GPR:$res, GPR:$scratch), +- (ins GPR:$addr, GPR:$cmpval, GPR:$newval)> { ++ (ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$ordering)> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; + let mayLoad = 1; + let mayStore = 1; +@@ -1821,14 +1821,28 @@ def : AtomicPat<int_loongarch_masked_atomicrmw_umax_i64, + def : AtomicPat<int_loongarch_masked_atomicrmw_umin_i64, + PseudoMaskedAtomicLoadUMin32>; + +-def : Pat<(atomic_cmp_swap_64 GPR:$addr, GPR:$cmp, GPR:$new), +- (PseudoCmpXchg64 GPR:$addr, GPR:$cmp, GPR:$new)>; ++// Ordering constants must be kept in sync with the AtomicOrdering enum in ++// AtomicOrdering.h. ++multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst, ++ ValueType vt = GRLenVT> { ++ def : Pat<(vt (!cast<PatFrag>(Op#"_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)), ++ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>; ++ def : Pat<(vt (!cast<PatFrag>(Op#"_acquire") GPR:$addr, GPR:$cmp, GPR:$new)), ++ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>; ++ def : Pat<(vt (!cast<PatFrag>(Op#"_release") GPR:$addr, GPR:$cmp, GPR:$new)), ++ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>; ++ def : Pat<(vt (!cast<PatFrag>(Op#"_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)), ++ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>; ++ def : Pat<(vt (!cast<PatFrag>(Op#"_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)), ++ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>; ++} ++ ++defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>; ++defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>; + def : Pat<(int_loongarch_masked_cmpxchg_i64 + GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), + (PseudoMaskedCmpXchg32 + GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; +-def : Pat<(atomic_cmp_swap_32 GPR:$addr, GPR:$cmp, GPR:$new), +- (PseudoCmpXchg32 GPR:$addr, GPR:$cmp, GPR:$new)>; + + def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_max_i64, + PseudoMaskedAtomicLoadMax32>; +diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +index f11af8fe6528..32106886c783 100644 +--- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll ++++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +@@ -34,14 +34,13 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: bne $a5, $a3, .LBB0_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a7, $a6 + ; LA64-NEXT: sc.w $a7, $a2, 0 + ; LA64-NEXT: beqz $a7, .LBB0_3 + ; LA64-NEXT: b .LBB0_6 + ; LA64-NEXT: .LBB0_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64-NEXT: addi.w $a6, $a3, 0 +@@ -88,14 +87,13 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: bne $a5, $a3, .LBB1_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a7, $a6 + ; LA64-NEXT: sc.w $a7, $a2, 0 + ; LA64-NEXT: beqz $a7, .LBB1_3 + ; LA64-NEXT: b .LBB1_6 + ; LA64-NEXT: .LBB1_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64-NEXT: addi.w $a6, $a3, 0 +@@ -129,14 +127,13 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { + ; LA64-NEXT: bne $a1, $a3, .LBB2_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a6, $a5 + ; LA64-NEXT: sc.w $a6, $a0, 0 + ; LA64-NEXT: beqz $a6, .LBB2_3 + ; LA64-NEXT: b .LBB2_6 + ; LA64-NEXT: .LBB2_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64-NEXT: move $a3, $a1 +@@ -168,14 +165,13 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { + ; LA64-NEXT: bne $a2, $a3, .LBB3_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB3_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a5, $a4 + ; LA64-NEXT: sc.d $a5, $a0, 0 + ; LA64-NEXT: beqz $a5, .LBB3_3 + ; LA64-NEXT: b .LBB3_6 + ; LA64-NEXT: .LBB3_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64-NEXT: bne $a2, $a3, .LBB3_1 +@@ -224,14 +220,13 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: bne $a6, $a3, .LBB4_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $t0, $a7 + ; LA64-NEXT: sc.w $t0, $a2, 0 + ; LA64-NEXT: beqz $t0, .LBB4_3 + ; LA64-NEXT: b .LBB4_6 + ; LA64-NEXT: .LBB4_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB4_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 + ; LA64-NEXT: addi.w $a7, $a3, 0 +@@ -283,14 +278,13 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: bne $a6, $a3, .LBB5_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $t0, $a7 + ; LA64-NEXT: sc.w $t0, $a2, 0 + ; LA64-NEXT: beqz $t0, .LBB5_3 + ; LA64-NEXT: b .LBB5_6 + ; LA64-NEXT: .LBB5_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB5_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 + ; LA64-NEXT: addi.w $a7, $a3, 0 +@@ -329,14 +323,13 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { + ; LA64-NEXT: bne $a2, $a4, .LBB6_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a7, $a6 + ; LA64-NEXT: sc.w $a7, $a0, 0 + ; LA64-NEXT: beqz $a7, .LBB6_3 + ; LA64-NEXT: b .LBB6_6 + ; LA64-NEXT: .LBB6_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB6_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 +@@ -373,14 +366,13 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { + ; LA64-NEXT: bne $a2, $a3, .LBB7_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB7_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a5, $a4 + ; LA64-NEXT: sc.d $a5, $a0, 0 + ; LA64-NEXT: beqz $a5, .LBB7_3 + ; LA64-NEXT: b .LBB7_6 + ; LA64-NEXT: .LBB7_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB7_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 + ; LA64-NEXT: bne $a2, $a3, .LBB7_1 +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +index 76e51fe7d3e8..1ac20d10e587 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +@@ -21,14 +21,13 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { + ; LA64-NEXT: and $a5, $a4, $a0 + ; LA64-NEXT: bne $a5, $a1, .LBB0_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: andn $a5, $a4, $a0 + ; LA64-NEXT: or $a5, $a5, $a2 + ; LA64-NEXT: sc.w $a5, $a3, 0 + ; LA64-NEXT: beqz $a5, .LBB0_1 + ; LA64-NEXT: b .LBB0_4 + ; LA64-NEXT: .LBB0_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB0_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire +@@ -56,14 +55,13 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind + ; LA64-NEXT: and $a5, $a4, $a0 + ; LA64-NEXT: bne $a5, $a1, .LBB1_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: andn $a5, $a4, $a0 + ; LA64-NEXT: or $a5, $a5, $a2 + ; LA64-NEXT: sc.w $a5, $a3, 0 + ; LA64-NEXT: beqz $a5, .LBB1_1 + ; LA64-NEXT: b .LBB1_4 + ; LA64-NEXT: .LBB1_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB1_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire +@@ -77,13 +75,12 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB2_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 + ; LA64-NEXT: beqz $a4, .LBB2_1 + ; LA64-NEXT: b .LBB2_4 + ; LA64-NEXT: .LBB2_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB2_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire +@@ -97,13 +94,12 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind + ; LA64-NEXT: ll.d $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB3_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 + ; LA64-NEXT: beqz $a4, .LBB3_1 + ; LA64-NEXT: b .LBB3_4 + ; LA64-NEXT: .LBB3_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB3_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire +@@ -130,14 +126,13 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind + ; LA64-NEXT: and $a6, $a5, $a4 + ; LA64-NEXT: bne $a6, $a1, .LBB4_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: andn $a6, $a5, $a4 + ; LA64-NEXT: or $a6, $a6, $a2 + ; LA64-NEXT: sc.w $a6, $a3, 0 + ; LA64-NEXT: beqz $a6, .LBB4_1 + ; LA64-NEXT: b .LBB4_4 + ; LA64-NEXT: .LBB4_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB4_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret +@@ -167,14 +162,13 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou + ; LA64-NEXT: and $a6, $a5, $a4 + ; LA64-NEXT: bne $a6, $a1, .LBB5_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: andn $a6, $a5, $a4 + ; LA64-NEXT: or $a6, $a6, $a2 + ; LA64-NEXT: sc.w $a6, $a3, 0 + ; LA64-NEXT: beqz $a6, .LBB5_1 + ; LA64-NEXT: b .LBB5_4 + ; LA64-NEXT: .LBB5_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB5_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret +@@ -190,13 +184,12 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB6_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 + ; LA64-NEXT: beqz $a4, .LBB6_1 + ; LA64-NEXT: b .LBB6_4 + ; LA64-NEXT: .LBB6_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB6_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret +@@ -212,13 +205,12 @@ define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nou + ; LA64-NEXT: ll.d $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB7_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 + ; LA64-NEXT: beqz $a4, .LBB7_1 + ; LA64-NEXT: b .LBB7_4 + ; LA64-NEXT: .LBB7_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB7_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret +@@ -247,14 +239,13 @@ define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind + ; LA64-NEXT: and $a6, $a5, $a2 + ; LA64-NEXT: bne $a6, $a1, .LBB8_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: andn $a6, $a5, $a2 + ; LA64-NEXT: or $a6, $a6, $a0 + ; LA64-NEXT: sc.w $a6, $a3, 0 + ; LA64-NEXT: beqz $a6, .LBB8_1 + ; LA64-NEXT: b .LBB8_4 + ; LA64-NEXT: .LBB8_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB8_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 +@@ -287,14 +278,13 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw + ; LA64-NEXT: and $a6, $a5, $a2 + ; LA64-NEXT: bne $a6, $a1, .LBB9_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: andn $a6, $a5, $a2 + ; LA64-NEXT: or $a6, $a6, $a0 + ; LA64-NEXT: sc.w $a6, $a3, 0 + ; LA64-NEXT: beqz $a6, .LBB9_1 + ; LA64-NEXT: b .LBB9_4 + ; LA64-NEXT: .LBB9_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB9_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 +@@ -313,13 +303,12 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB10_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 + ; LA64-NEXT: beqz $a4, .LBB10_1 + ; LA64-NEXT: b .LBB10_4 + ; LA64-NEXT: .LBB10_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB10_4: + ; LA64-NEXT: addi.w $a0, $a1, 0 + ; LA64-NEXT: xor $a0, $a3, $a0 +@@ -337,13 +326,12 @@ define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounw + ; LA64-NEXT: ll.d $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB11_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 + ; LA64-NEXT: beqz $a4, .LBB11_1 + ; LA64-NEXT: b .LBB11_4 + ; LA64-NEXT: .LBB11_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB11_4: + ; LA64-NEXT: xor $a0, $a3, $a1 + ; LA64-NEXT: sltui $a0, $a0, 1 +@@ -352,3 +340,343 @@ define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounw + %res = extractvalue { i64, i1 } %tmp, 1 + ret i1 %res + } ++ ++define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_monotonic_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: andi $a2, $a2, 255 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: ori $a4, $zero, 255 ++; LA64-NEXT: sll.w $a0, $a4, $a0 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a3, 0 ++; LA64-NEXT: and $a5, $a4, $a0 ++; LA64-NEXT: bne $a5, $a1, .LBB12_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 ++; LA64-NEXT: andn $a5, $a4, $a0 ++; LA64-NEXT: or $a5, $a5, $a2 ++; LA64-NEXT: sc.w $a5, $a3, 0 ++; LA64-NEXT: beqz $a5, .LBB12_1 ++; LA64-NEXT: b .LBB12_4 ++; LA64-NEXT: .LBB12_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB12_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic ++ ret void ++} ++ ++define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_monotonic_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a0, $a4, $a0 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a3, 0 ++; LA64-NEXT: and $a5, $a4, $a0 ++; LA64-NEXT: bne $a5, $a1, .LBB13_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 ++; LA64-NEXT: andn $a5, $a4, $a0 ++; LA64-NEXT: or $a5, $a5, $a2 ++; LA64-NEXT: sc.w $a5, $a3, 0 ++; LA64-NEXT: beqz $a5, .LBB13_1 ++; LA64-NEXT: b .LBB13_4 ++; LA64-NEXT: .LBB13_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB13_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic ++ ret void ++} ++ ++define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_monotonic_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB14_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.w $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB14_1 ++; LA64-NEXT: b .LBB14_4 ++; LA64-NEXT: .LBB14_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB14_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic ++ ret void ++} ++ ++define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_monotonic_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB15_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.d $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB15_1 ++; LA64-NEXT: b .LBB15_4 ++; LA64-NEXT: .LBB15_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB15_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic ++ ret void ++} ++ ++define i8 @cmpxchg_i8_monotonic_monotonic_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti8: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a4, $zero, 255 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: addi.w $a4, $a4, 0 ++; LA64-NEXT: andi $a2, $a2, 255 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a3, 0 ++; LA64-NEXT: and $a6, $a5, $a4 ++; LA64-NEXT: bne $a6, $a1, .LBB16_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 ++; LA64-NEXT: andn $a6, $a5, $a4 ++; LA64-NEXT: or $a6, $a6, $a2 ++; LA64-NEXT: sc.w $a6, $a3, 0 ++; LA64-NEXT: beqz $a6, .LBB16_1 ++; LA64-NEXT: b .LBB16_4 ++; LA64-NEXT: .LBB16_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB16_4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic ++ %res = extractvalue { i8, i1 } %tmp, 0 ++ ret i8 %res ++} ++ ++define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti16: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: addi.w $a4, $a4, 0 ++; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a3, 0 ++; LA64-NEXT: and $a6, $a5, $a4 ++; LA64-NEXT: bne $a6, $a1, .LBB17_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 ++; LA64-NEXT: andn $a6, $a5, $a4 ++; LA64-NEXT: or $a6, $a6, $a2 ++; LA64-NEXT: sc.w $a6, $a3, 0 ++; LA64-NEXT: beqz $a6, .LBB17_1 ++; LA64-NEXT: b .LBB17_4 ++; LA64-NEXT: .LBB17_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB17_4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic ++ %res = extractvalue { i16, i1 } %tmp, 0 ++ ret i16 %res ++} ++ ++define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB18_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.w $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB18_1 ++; LA64-NEXT: b .LBB18_4 ++; LA64-NEXT: .LBB18_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB18_4: ++; LA64-NEXT: move $a0, $a3 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic ++ %res = extractvalue { i32, i1 } %tmp, 0 ++ ret i32 %res ++} ++ ++define i64 @cmpxchg_i64_monotonic_monotonic_reti64(ptr %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti64: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB19_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.d $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB19_1 ++; LA64-NEXT: b .LBB19_4 ++; LA64-NEXT: .LBB19_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB19_4: ++; LA64-NEXT: move $a0, $a3 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic ++ %res = extractvalue { i64, i1 } %tmp, 0 ++ ret i64 %res ++} ++ ++define i1 @cmpxchg_i8_monotonic_monotonic_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a4, $zero, 255 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: andi $a2, $a2, 255 ++; LA64-NEXT: sll.w $a0, $a2, $a0 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: addi.w $a2, $a4, 0 ++; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a3, 0 ++; LA64-NEXT: and $a6, $a5, $a2 ++; LA64-NEXT: bne $a6, $a1, .LBB20_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 ++; LA64-NEXT: andn $a6, $a5, $a2 ++; LA64-NEXT: or $a6, $a6, $a0 ++; LA64-NEXT: sc.w $a6, $a3, 0 ++; LA64-NEXT: beqz $a6, .LBB20_1 ++; LA64-NEXT: b .LBB20_4 ++; LA64-NEXT: .LBB20_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB20_4: ++; LA64-NEXT: and $a0, $a5, $a4 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: xor $a0, $a1, $a0 ++; LA64-NEXT: sltui $a0, $a0, 1 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic ++ %res = extractvalue { i8, i1 } %tmp, 1 ++ ret i1 %res ++} ++ ++define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 ++; LA64-NEXT: sll.w $a0, $a2, $a0 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: addi.w $a2, $a4, 0 ++; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a3, 0 ++; LA64-NEXT: and $a6, $a5, $a2 ++; LA64-NEXT: bne $a6, $a1, .LBB21_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 ++; LA64-NEXT: andn $a6, $a5, $a2 ++; LA64-NEXT: or $a6, $a6, $a0 ++; LA64-NEXT: sc.w $a6, $a3, 0 ++; LA64-NEXT: beqz $a6, .LBB21_1 ++; LA64-NEXT: b .LBB21_4 ++; LA64-NEXT: .LBB21_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB21_4: ++; LA64-NEXT: and $a0, $a5, $a4 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: xor $a0, $a1, $a0 ++; LA64-NEXT: sltui $a0, $a0, 1 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic ++ %res = extractvalue { i16, i1 } %tmp, 1 ++ ret i1 %res ++} ++ ++define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB22_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.w $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB22_1 ++; LA64-NEXT: b .LBB22_4 ++; LA64-NEXT: .LBB22_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB22_4: ++; LA64-NEXT: addi.w $a0, $a1, 0 ++; LA64-NEXT: xor $a0, $a3, $a0 ++; LA64-NEXT: sltui $a0, $a0, 1 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic ++ %res = extractvalue { i32, i1 } %tmp, 1 ++ ret i1 %res ++} ++ ++define i1 @cmpxchg_i64_monotonic_monotonic_reti1(ptr %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB23_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.d $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB23_1 ++; LA64-NEXT: b .LBB23_4 ++; LA64-NEXT: .LBB23_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB23_4: ++; LA64-NEXT: xor $a0, $a3, $a1 ++; LA64-NEXT: sltui $a0, $a0, 1 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic ++ %res = extractvalue { i64, i1 } %tmp, 1 ++ ret i1 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +index 9767717395b6..9a29d67e9982 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +@@ -25,14 +25,13 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64F-NEXT: bne $a3, $a2, .LBB0_5 + ; LA64F-NEXT: # %bb.4: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB0_3 Depth=2 +-; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: move $a4, $a1 + ; LA64F-NEXT: sc.w $a4, $a0, 0 + ; LA64F-NEXT: beqz $a4, .LBB0_3 + ; LA64F-NEXT: b .LBB0_6 + ; LA64F-NEXT: .LBB0_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 +-; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -61,14 +60,13 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64D-NEXT: bne $a3, $a2, .LBB0_5 + ; LA64D-NEXT: # %bb.4: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB0_3 Depth=2 +-; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: move $a4, $a1 + ; LA64D-NEXT: sc.w $a4, $a0, 0 + ; LA64D-NEXT: beqz $a4, .LBB0_3 + ; LA64D-NEXT: b .LBB0_6 + ; LA64D-NEXT: .LBB0_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 +-; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -101,14 +99,13 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64F-NEXT: bne $a3, $a2, .LBB1_5 + ; LA64F-NEXT: # %bb.4: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB1_3 Depth=2 +-; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: move $a4, $a1 + ; LA64F-NEXT: sc.w $a4, $a0, 0 + ; LA64F-NEXT: beqz $a4, .LBB1_3 + ; LA64F-NEXT: b .LBB1_6 + ; LA64F-NEXT: .LBB1_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 +-; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -137,14 +134,13 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64D-NEXT: bne $a3, $a2, .LBB1_5 + ; LA64D-NEXT: # %bb.4: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB1_3 Depth=2 +-; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: move $a4, $a1 + ; LA64D-NEXT: sc.w $a4, $a0, 0 + ; LA64D-NEXT: beqz $a4, .LBB1_3 + ; LA64D-NEXT: b .LBB1_6 + ; LA64D-NEXT: .LBB1_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 +-; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -178,14 +174,13 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64F-NEXT: bne $a3, $a2, .LBB2_5 + ; LA64F-NEXT: # %bb.4: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB2_3 Depth=2 +-; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: move $a4, $a1 + ; LA64F-NEXT: sc.w $a4, $a0, 0 + ; LA64F-NEXT: beqz $a4, .LBB2_3 + ; LA64F-NEXT: b .LBB2_6 + ; LA64F-NEXT: .LBB2_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 +-; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -215,14 +210,13 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64D-NEXT: bne $a3, $a2, .LBB2_5 + ; LA64D-NEXT: # %bb.4: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB2_3 Depth=2 +-; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: move $a4, $a1 + ; LA64D-NEXT: sc.w $a4, $a0, 0 + ; LA64D-NEXT: beqz $a4, .LBB2_3 + ; LA64D-NEXT: b .LBB2_6 + ; LA64D-NEXT: .LBB2_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 +-; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -256,14 +250,13 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64F-NEXT: bne $a3, $a2, .LBB3_5 + ; LA64F-NEXT: # %bb.4: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB3_3 Depth=2 +-; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: move $a4, $a1 + ; LA64F-NEXT: sc.w $a4, $a0, 0 + ; LA64F-NEXT: beqz $a4, .LBB3_3 + ; LA64F-NEXT: b .LBB3_6 + ; LA64F-NEXT: .LBB3_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 +-; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -293,14 +286,13 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64D-NEXT: bne $a3, $a2, .LBB3_5 + ; LA64D-NEXT: # %bb.4: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB3_3 Depth=2 +-; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: move $a4, $a1 + ; LA64D-NEXT: sc.w $a4, $a0, 0 + ; LA64D-NEXT: beqz $a4, .LBB3_3 + ; LA64D-NEXT: b .LBB3_6 + ; LA64D-NEXT: .LBB3_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 +-; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll +index cd4a9e7fa9c4..26ba77e8d4fd 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll +@@ -17,7 +17,6 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: and $a6, $a4, $a3 + ; LA64-NEXT: move $a5, $a4 +@@ -30,8 +29,6 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sc.w $a5, $a2, 0 + ; LA64-NEXT: beqz $a5, .LBB0_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i8 %b acquire +@@ -52,7 +49,6 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: and $a6, $a4, $a3 + ; LA64-NEXT: move $a5, $a4 +@@ -65,8 +61,6 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sc.w $a5, $a2, 0 + ; LA64-NEXT: beqz $a5, .LBB1_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i16 %b acquire +@@ -106,7 +100,6 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: and $a6, $a4, $a3 + ; LA64-NEXT: move $a5, $a4 +@@ -119,8 +112,6 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sc.w $a5, $a2, 0 + ; LA64-NEXT: beqz $a5, .LBB4_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i8 %b acquire +@@ -141,7 +132,6 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: and $a6, $a4, $a3 + ; LA64-NEXT: move $a5, $a4 +@@ -154,8 +144,6 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sc.w $a5, $a2, 0 + ; LA64-NEXT: beqz $a5, .LBB5_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i16 %b acquire +@@ -197,7 +185,6 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: andi $a4, $a0, 24 + ; LA64-NEXT: xori $a4, $a4, 56 + ; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a5, $a2, 0 + ; LA64-NEXT: and $a7, $a5, $a3 + ; LA64-NEXT: move $a6, $a5 +@@ -212,8 +199,6 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sc.w $a6, $a2, 0 + ; LA64-NEXT: beqz $a6, .LBB8_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i8 %b acquire +@@ -237,7 +222,6 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a5, $a2, 0 + ; LA64-NEXT: and $a7, $a5, $a4 + ; LA64-NEXT: move $a6, $a5 +@@ -252,8 +236,6 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sc.w $a6, $a2, 0 + ; LA64-NEXT: beqz $a6, .LBB9_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i16 %b acquire +@@ -295,7 +277,6 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: andi $a4, $a0, 24 + ; LA64-NEXT: xori $a4, $a4, 56 + ; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a5, $a2, 0 + ; LA64-NEXT: and $a7, $a5, $a3 + ; LA64-NEXT: move $a6, $a5 +@@ -310,8 +291,6 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sc.w $a6, $a2, 0 + ; LA64-NEXT: beqz $a6, .LBB12_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i8 %b acquire +@@ -335,7 +314,6 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a5, $a2, 0 + ; LA64-NEXT: and $a7, $a5, $a4 + ; LA64-NEXT: move $a6, $a5 +@@ -350,8 +328,6 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sc.w $a6, $a2, 0 + ; LA64-NEXT: beqz $a6, .LBB13_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i16 %b acquire +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll +index c077d14f728f..626276ba05f7 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll +@@ -13,7 +13,6 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: addi.w $a5, $a1, 0 + ; LA32-NEXT: xor $a5, $a4, $a5 +@@ -37,7 +36,6 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: addi.w $a5, $a1, 0 + ; LA64-NEXT: xor $a5, $a4, $a5 +@@ -64,7 +62,6 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: addi.w $a5, $a1, 0 + ; LA32-NEXT: xor $a5, $a4, $a5 +@@ -89,7 +86,6 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: addi.w $a5, $a1, 0 + ; LA64-NEXT: xor $a5, $a4, $a5 +@@ -108,7 +104,6 @@ define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_xchg_i32_acquire: + ; LA32: # %bb.0: + ; LA32-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: move $a3, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +@@ -157,7 +152,6 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: add.w $a5, $a4, $a1 + ; LA32-NEXT: xor $a5, $a4, $a5 +@@ -181,7 +175,6 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: add.w $a5, $a4, $a1 + ; LA64-NEXT: xor $a5, $a4, $a5 +@@ -208,7 +201,6 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: add.w $a5, $a4, $a1 + ; LA32-NEXT: xor $a5, $a4, $a5 +@@ -233,7 +225,6 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: add.w $a5, $a4, $a1 + ; LA64-NEXT: xor $a5, $a4, $a5 +@@ -252,7 +243,6 @@ define i32 @atomicrmw_add_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_add_i32_acquire: + ; LA32: # %bb.0: + ; LA32-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: add.w $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +@@ -301,7 +291,6 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: sub.w $a5, $a4, $a1 + ; LA32-NEXT: xor $a5, $a4, $a5 +@@ -325,7 +314,6 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: sub.w $a5, $a4, $a1 + ; LA64-NEXT: xor $a5, $a4, $a5 +@@ -352,7 +340,6 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: sub.w $a5, $a4, $a1 + ; LA32-NEXT: xor $a5, $a4, $a5 +@@ -377,7 +364,6 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: sub.w $a5, $a4, $a1 + ; LA64-NEXT: xor $a5, $a4, $a5 +@@ -396,7 +382,6 @@ define i32 @atomicrmw_sub_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_sub_i32_acquire: + ; LA32: # %bb.0: + ; LA32-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: sub.w $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +@@ -447,7 +432,6 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: and $a5, $a4, $a1 + ; LA32-NEXT: nor $a5, $a5, $zero +@@ -472,7 +456,6 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: and $a5, $a4, $a1 + ; LA64-NEXT: nor $a5, $a5, $zero +@@ -500,7 +483,6 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: and $a5, $a4, $a1 + ; LA32-NEXT: nor $a5, $a5, $zero +@@ -526,7 +508,6 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: and $a5, $a4, $a1 + ; LA64-NEXT: nor $a5, $a5, $zero +@@ -546,7 +527,6 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_nand_i32_acquire: + ; LA32: # %bb.0: + ; LA32-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: and $a3, $a2, $a1 + ; LA32-NEXT: nor $a3, $a3, $zero +@@ -559,7 +539,6 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA64-LABEL: atomicrmw_nand_i32_acquire: + ; LA64: # %bb.0: + ; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a2, $a0, 0 + ; LA64-NEXT: and $a3, $a2, $a1 + ; LA64-NEXT: nor $a3, $a3, $zero +@@ -586,7 +565,6 @@ define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind { + ; LA64-LABEL: atomicrmw_nand_i64_acquire: + ; LA64: # %bb.0: + ; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.d $a2, $a0, 0 + ; LA64-NEXT: and $a3, $a2, $a1 + ; LA64-NEXT: nor $a3, $a3, $zero +@@ -611,7 +589,6 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: addi.w $a3, $zero, -4 + ; LA32-NEXT: and $a0, $a0, $a3 + ; LA32-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a3, $a0, 0 + ; LA32-NEXT: and $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a0, 0 +@@ -650,7 +627,6 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: addi.w $a2, $zero, -4 + ; LA32-NEXT: and $a0, $a0, $a2 + ; LA32-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: and $a4, $a2, $a1 + ; LA32-NEXT: sc.w $a4, $a0, 0 +@@ -681,7 +657,6 @@ define i32 @atomicrmw_and_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_and_i32_acquire: + ; LA32: # %bb.0: + ; LA32-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: and $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +@@ -728,7 +703,6 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a3, $a2, 0 + ; LA32-NEXT: or $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a2, 0 +@@ -760,7 +734,6 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a3, $a2, 0 + ; LA32-NEXT: or $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a2, 0 +@@ -787,7 +760,6 @@ define i32 @atomicrmw_or_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_or_i32_acquire: + ; LA32: # %bb.0: + ; LA32-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: or $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +@@ -834,7 +806,6 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a3, $a2, 0 + ; LA32-NEXT: xor $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a2, 0 +@@ -866,7 +837,6 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a3, $a2, 0 + ; LA32-NEXT: xor $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a2, 0 +@@ -893,7 +863,6 @@ define i32 @atomicrmw_xor_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_xor_i32_acquire: + ; LA32: # %bb.0: + ; LA32-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: xor $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +diff --git a/llvm/unittests/Target/LoongArch/InstSizes.cpp b/llvm/unittests/Target/LoongArch/InstSizes.cpp +index 1a5d4369c48b..3180c7237a79 100644 +--- a/llvm/unittests/Target/LoongArch/InstSizes.cpp ++++ b/llvm/unittests/Target/LoongArch/InstSizes.cpp +@@ -121,7 +121,7 @@ TEST(InstSizes, AtomicPseudo) { + " dead early-clobber renamable $r10, dead early-clobber renamable $r11 = PseudoAtomicLoadAdd32 renamable $r7, renamable $r6, renamable $r8\n" + " dead early-clobber renamable $r5, dead early-clobber renamable $r9, dead early-clobber renamable $r10 = PseudoMaskedAtomicLoadUMax32 renamable $r7, renamable $r6, renamable $r8, 4\n" + " early-clobber renamable $r9, dead early-clobber renamable $r10, dead early-clobber renamable $r11 = PseudoMaskedAtomicLoadMax32 killed renamable $r6, killed renamable $r5, killed renamable $r7, killed renamable $r8, 4\n" +- " dead early-clobber renamable $r5, dead early-clobber renamable $r9 = PseudoCmpXchg32 renamable $r7, renamable $r4, renamable $r6\n" ++ " dead early-clobber renamable $r5, dead early-clobber renamable $r9 = PseudoCmpXchg32 renamable $r7, renamable $r4, renamable $r6, 4\n" + " dead early-clobber renamable $r5, dead early-clobber renamable $r9 = PseudoMaskedCmpXchg32 killed renamable $r7, killed renamable $r4, killed renamable $r6, killed renamable $r8, 4\n", + // clang-format on + (LoongArchInstrInfo &II, MachineFunction &MF) { +-- +2.20.1 + + +From 5f2a6174965bccaeefdeb410cf67ea0cb378b26c Mon Sep 17 00:00:00 2001 +From: hev <wangrui@loongson.cn> +Date: Wed, 11 Oct 2023 18:28:04 +0800 +Subject: PATCH 2/7 LoongArch Add some atomic tests (#68766) + +(cherry picked from commit 37b93f07cd7ba2b1e6e81116cd49d34396b7b70a) +--- + .../LoongArch/ir-instruction/atomicrmw-fp.ll | 2714 ++++++++++++- + .../ir-instruction/atomicrmw-minmax.ll | 1400 +++++++ + .../LoongArch/ir-instruction/atomicrmw.ll | 3346 ++++++++++++++++- + .../ir-instruction/fence-singlethread.ll | 17 + + .../ir-instruction/load-store-atomic.ll | 196 + + 5 files changed, 7609 insertions(+), 64 deletions(-) + create mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll + +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +index 9a29d67e9982..02d481cb3865 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +@@ -2,8 +2,6 @@ + ; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F + ; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D + +-;; Add more test cases after supporting different AtomicOrdering. +- + define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64F-LABEL: float_fadd_acquire: + ; LA64F: # %bb.0: +@@ -681,3 +679,2715 @@ define double @double_fmax_acquire(ptr %p) nounwind { + %v = atomicrmw fmax ptr %p, double 1.0 acquire, align 4 + ret double %v + } ++ ++define float @float_fadd_release(ptr %p) nounwind { ++; LA64F-LABEL: float_fadd_release: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB8_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB8_3 Depth 2 ++; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB8_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB8_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB8_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB8_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB8_3 ++; LA64F-NEXT: b .LBB8_6 ++; LA64F-NEXT: .LBB8_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB8_1 Depth=1 ++; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: .LBB8_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB8_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB8_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fadd_release: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB8_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB8_3 Depth 2 ++; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB8_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB8_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB8_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB8_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB8_3 ++; LA64D-NEXT: b .LBB8_6 ++; LA64D-NEXT: .LBB8_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB8_1 Depth=1 ++; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: .LBB8_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB8_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB8_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fadd ptr %p, float 1.0 release, align 4 ++ ret float %v ++} ++ ++define float @float_fsub_release(ptr %p) nounwind { ++; LA64F-LABEL: float_fsub_release: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0) ++; LA64F-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI9_0) ++; LA64F-NEXT: fld.s $fa1, $a1, 0 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB9_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB9_3 Depth 2 ++; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB9_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB9_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB9_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB9_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB9_3 ++; LA64F-NEXT: b .LBB9_6 ++; LA64F-NEXT: .LBB9_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB9_1 Depth=1 ++; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: .LBB9_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB9_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB9_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fsub_release: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0) ++; LA64D-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI9_0) ++; LA64D-NEXT: fld.s $fa1, $a1, 0 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB9_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB9_3 Depth 2 ++; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB9_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB9_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB9_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB9_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB9_3 ++; LA64D-NEXT: b .LBB9_6 ++; LA64D-NEXT: .LBB9_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB9_1 Depth=1 ++; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: .LBB9_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB9_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB9_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fsub ptr %p, float 1.0 release, align 4 ++ ret float %v ++} ++ ++define float @float_fmin_release(ptr %p) nounwind { ++; LA64F-LABEL: float_fmin_release: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB10_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB10_3 Depth 2 ++; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB10_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB10_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB10_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB10_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB10_3 ++; LA64F-NEXT: b .LBB10_6 ++; LA64F-NEXT: .LBB10_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB10_1 Depth=1 ++; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: .LBB10_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB10_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB10_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fmin_release: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB10_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB10_3 Depth 2 ++; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB10_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB10_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB10_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB10_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB10_3 ++; LA64D-NEXT: b .LBB10_6 ++; LA64D-NEXT: .LBB10_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB10_1 Depth=1 ++; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: .LBB10_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB10_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB10_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fmin ptr %p, float 1.0 release, align 4 ++ ret float %v ++} ++ ++define float @float_fmax_release(ptr %p) nounwind { ++; LA64F-LABEL: float_fmax_release: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB11_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB11_3 Depth 2 ++; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB11_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB11_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB11_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB11_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB11_3 ++; LA64F-NEXT: b .LBB11_6 ++; LA64F-NEXT: .LBB11_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB11_1 Depth=1 ++; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: .LBB11_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB11_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB11_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fmax_release: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB11_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB11_3 Depth 2 ++; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB11_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB11_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB11_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB11_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB11_3 ++; LA64D-NEXT: b .LBB11_6 ++; LA64D-NEXT: .LBB11_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB11_1 Depth=1 ++; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: .LBB11_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB11_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB11_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fmax ptr %p, float 1.0 release, align 4 ++ ret float %v ++} ++ ++define double @double_fadd_release(ptr %p) nounwind { ++; LA64F-LABEL: double_fadd_release: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 16 ++; LA64F-NEXT: addi.d $s3, $sp, 8 ++; LA64F-NEXT: ori $s4, $zero, 3 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB12_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 16 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(__adddf3) ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $zero ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 16 ++; LA64F-NEXT: beqz $a1, .LBB12_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fadd_release: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 16 ++; LA64D-NEXT: addi.d $s2, $sp, 8 ++; LA64D-NEXT: ori $s3, $zero, 3 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB12_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 16 ++; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $zero ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 16 ++; LA64D-NEXT: beqz $a0, .LBB12_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fadd ptr %p, double 1.0 release, align 4 ++ ret double %v ++} ++ ++define double @double_fsub_release(ptr %p) nounwind { ++; LA64F-LABEL: double_fsub_release: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, -1025 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 16 ++; LA64F-NEXT: addi.d $s3, $sp, 8 ++; LA64F-NEXT: ori $s4, $zero, 3 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB13_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 16 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(__adddf3) ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $zero ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 16 ++; LA64F-NEXT: beqz $a1, .LBB13_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fsub_release: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI13_0) ++; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI13_0) ++; LA64D-NEXT: fld.d $fs0, $a0, 0 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 16 ++; LA64D-NEXT: addi.d $s2, $sp, 8 ++; LA64D-NEXT: ori $s3, $zero, 3 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB13_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 16 ++; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $zero ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 16 ++; LA64D-NEXT: beqz $a0, .LBB13_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fsub ptr %p, double 1.0 release, align 4 ++ ret double %v ++} ++ ++define double @double_fmin_release(ptr %p) nounwind { ++; LA64F-LABEL: double_fmin_release: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 16 ++; LA64F-NEXT: addi.d $s3, $sp, 8 ++; LA64F-NEXT: ori $s4, $zero, 3 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB14_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 16 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(fmin) ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $zero ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 16 ++; LA64F-NEXT: beqz $a1, .LBB14_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fmin_release: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 16 ++; LA64D-NEXT: addi.d $s2, $sp, 8 ++; LA64D-NEXT: ori $s3, $zero, 3 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB14_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 16 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 ++; LA64D-NEXT: fmin.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $zero ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 16 ++; LA64D-NEXT: beqz $a0, .LBB14_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fmin ptr %p, double 1.0 release, align 4 ++ ret double %v ++} ++ ++define double @double_fmax_release(ptr %p) nounwind { ++; LA64F-LABEL: double_fmax_release: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 16 ++; LA64F-NEXT: addi.d $s3, $sp, 8 ++; LA64F-NEXT: ori $s4, $zero, 3 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB15_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 16 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(fmax) ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $zero ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 16 ++; LA64F-NEXT: beqz $a1, .LBB15_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fmax_release: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 16 ++; LA64D-NEXT: addi.d $s2, $sp, 8 ++; LA64D-NEXT: ori $s3, $zero, 3 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB15_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 16 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $zero ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 16 ++; LA64D-NEXT: beqz $a0, .LBB15_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fmax ptr %p, double 1.0 release, align 4 ++ ret double %v ++} ++ ++define float @float_fadd_acq_rel(ptr %p) nounwind { ++; LA64F-LABEL: float_fadd_acq_rel: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB16_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB16_3 Depth 2 ++; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB16_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB16_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB16_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB16_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB16_3 ++; LA64F-NEXT: b .LBB16_6 ++; LA64F-NEXT: .LBB16_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 ++; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: .LBB16_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB16_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fadd_acq_rel: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB16_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB16_3 Depth 2 ++; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB16_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB16_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB16_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB16_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB16_3 ++; LA64D-NEXT: b .LBB16_6 ++; LA64D-NEXT: .LBB16_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 ++; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: .LBB16_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB16_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fadd ptr %p, float 1.0 acq_rel, align 4 ++ ret float %v ++} ++ ++define float @float_fsub_acq_rel(ptr %p) nounwind { ++; LA64F-LABEL: float_fsub_acq_rel: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0) ++; LA64F-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0) ++; LA64F-NEXT: fld.s $fa1, $a1, 0 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB17_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB17_3 Depth 2 ++; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB17_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB17_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB17_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB17_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB17_3 ++; LA64F-NEXT: b .LBB17_6 ++; LA64F-NEXT: .LBB17_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: .LBB17_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB17_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fsub_acq_rel: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0) ++; LA64D-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0) ++; LA64D-NEXT: fld.s $fa1, $a1, 0 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB17_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB17_3 Depth 2 ++; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB17_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB17_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB17_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB17_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB17_3 ++; LA64D-NEXT: b .LBB17_6 ++; LA64D-NEXT: .LBB17_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: .LBB17_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB17_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fsub ptr %p, float 1.0 acq_rel, align 4 ++ ret float %v ++} ++ ++define float @float_fmin_acq_rel(ptr %p) nounwind { ++; LA64F-LABEL: float_fmin_acq_rel: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB18_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB18_3 Depth 2 ++; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB18_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB18_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB18_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB18_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB18_3 ++; LA64F-NEXT: b .LBB18_6 ++; LA64F-NEXT: .LBB18_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 ++; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: .LBB18_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB18_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fmin_acq_rel: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB18_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB18_3 Depth 2 ++; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB18_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB18_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB18_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB18_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB18_3 ++; LA64D-NEXT: b .LBB18_6 ++; LA64D-NEXT: .LBB18_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 ++; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: .LBB18_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB18_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fmin ptr %p, float 1.0 acq_rel, align 4 ++ ret float %v ++} ++ ++define float @float_fmax_acq_rel(ptr %p) nounwind { ++; LA64F-LABEL: float_fmax_acq_rel: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB19_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB19_3 Depth 2 ++; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB19_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB19_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB19_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB19_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB19_3 ++; LA64F-NEXT: b .LBB19_6 ++; LA64F-NEXT: .LBB19_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 ++; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: .LBB19_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB19_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fmax_acq_rel: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB19_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB19_3 Depth 2 ++; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB19_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB19_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB19_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB19_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB19_3 ++; LA64D-NEXT: b .LBB19_6 ++; LA64D-NEXT: .LBB19_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 ++; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: .LBB19_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB19_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fmax ptr %p, float 1.0 acq_rel, align 4 ++ ret float %v ++} ++ ++define double @double_fadd_acq_rel(ptr %p) nounwind { ++; LA64F-LABEL: double_fadd_acq_rel: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s5, $sp, 16 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 8 ++; LA64F-NEXT: addi.d $s3, $sp, 0 ++; LA64F-NEXT: ori $s4, $zero, 4 ++; LA64F-NEXT: ori $s5, $zero, 2 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB20_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(__adddf3) ++; LA64F-NEXT: st.d $a0, $sp, 0 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $s5 ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 8 ++; LA64F-NEXT: beqz $a1, .LBB20_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s5, $sp, 16 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fadd_acq_rel: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 8 ++; LA64D-NEXT: addi.d $s2, $sp, 0 ++; LA64D-NEXT: ori $s3, $zero, 4 ++; LA64D-NEXT: ori $s4, $zero, 2 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB20_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 0 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $s4 ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 8 ++; LA64D-NEXT: beqz $a0, .LBB20_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fadd ptr %p, double 1.0 acq_rel, align 4 ++ ret double %v ++} ++ ++define double @double_fsub_acq_rel(ptr %p) nounwind { ++; LA64F-LABEL: double_fsub_acq_rel: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s5, $sp, 16 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, -1025 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 8 ++; LA64F-NEXT: addi.d $s3, $sp, 0 ++; LA64F-NEXT: ori $s4, $zero, 4 ++; LA64F-NEXT: ori $s5, $zero, 2 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB21_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(__adddf3) ++; LA64F-NEXT: st.d $a0, $sp, 0 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $s5 ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 8 ++; LA64F-NEXT: beqz $a1, .LBB21_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s5, $sp, 16 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fsub_acq_rel: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI21_0) ++; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI21_0) ++; LA64D-NEXT: fld.d $fs0, $a0, 0 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 8 ++; LA64D-NEXT: addi.d $s2, $sp, 0 ++; LA64D-NEXT: ori $s3, $zero, 4 ++; LA64D-NEXT: ori $s4, $zero, 2 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB21_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 0 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $s4 ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 8 ++; LA64D-NEXT: beqz $a0, .LBB21_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fsub ptr %p, double 1.0 acq_rel, align 4 ++ ret double %v ++} ++ ++define double @double_fmin_acq_rel(ptr %p) nounwind { ++; LA64F-LABEL: double_fmin_acq_rel: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s5, $sp, 16 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 8 ++; LA64F-NEXT: addi.d $s3, $sp, 0 ++; LA64F-NEXT: ori $s4, $zero, 4 ++; LA64F-NEXT: ori $s5, $zero, 2 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB22_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(fmin) ++; LA64F-NEXT: st.d $a0, $sp, 0 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $s5 ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 8 ++; LA64F-NEXT: beqz $a1, .LBB22_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s5, $sp, 16 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fmin_acq_rel: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 8 ++; LA64D-NEXT: addi.d $s2, $sp, 0 ++; LA64D-NEXT: ori $s3, $zero, 4 ++; LA64D-NEXT: ori $s4, $zero, 2 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB22_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 ++; LA64D-NEXT: fmin.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 0 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $s4 ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 8 ++; LA64D-NEXT: beqz $a0, .LBB22_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fmin ptr %p, double 1.0 acq_rel, align 4 ++ ret double %v ++} ++ ++define double @double_fmax_acq_rel(ptr %p) nounwind { ++; LA64F-LABEL: double_fmax_acq_rel: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s5, $sp, 16 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 8 ++; LA64F-NEXT: addi.d $s3, $sp, 0 ++; LA64F-NEXT: ori $s4, $zero, 4 ++; LA64F-NEXT: ori $s5, $zero, 2 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB23_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(fmax) ++; LA64F-NEXT: st.d $a0, $sp, 0 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $s5 ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 8 ++; LA64F-NEXT: beqz $a1, .LBB23_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s5, $sp, 16 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fmax_acq_rel: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 8 ++; LA64D-NEXT: addi.d $s2, $sp, 0 ++; LA64D-NEXT: ori $s3, $zero, 4 ++; LA64D-NEXT: ori $s4, $zero, 2 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB23_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 0 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $s4 ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 8 ++; LA64D-NEXT: beqz $a0, .LBB23_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fmax ptr %p, double 1.0 acq_rel, align 4 ++ ret double %v ++} ++ ++define float @float_fadd_seq_cst(ptr %p) nounwind { ++; LA64F-LABEL: float_fadd_seq_cst: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB24_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB24_3 Depth 2 ++; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB24_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB24_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB24_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB24_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB24_3 ++; LA64F-NEXT: b .LBB24_6 ++; LA64F-NEXT: .LBB24_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 ++; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: .LBB24_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB24_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fadd_seq_cst: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB24_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB24_3 Depth 2 ++; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB24_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB24_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB24_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB24_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB24_3 ++; LA64D-NEXT: b .LBB24_6 ++; LA64D-NEXT: .LBB24_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 ++; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: .LBB24_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB24_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fadd ptr %p, float 1.0 seq_cst, align 4 ++ ret float %v ++} ++ ++define float @float_fsub_seq_cst(ptr %p) nounwind { ++; LA64F-LABEL: float_fsub_seq_cst: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI25_0) ++; LA64F-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI25_0) ++; LA64F-NEXT: fld.s $fa1, $a1, 0 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB25_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB25_3 Depth 2 ++; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB25_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB25_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB25_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB25_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB25_3 ++; LA64F-NEXT: b .LBB25_6 ++; LA64F-NEXT: .LBB25_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 ++; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: .LBB25_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB25_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fsub_seq_cst: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: pcalau12i $a1, %pc_hi20(.LCPI25_0) ++; LA64D-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI25_0) ++; LA64D-NEXT: fld.s $fa1, $a1, 0 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB25_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB25_3 Depth 2 ++; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB25_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB25_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB25_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB25_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB25_3 ++; LA64D-NEXT: b .LBB25_6 ++; LA64D-NEXT: .LBB25_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 ++; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: .LBB25_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB25_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fsub ptr %p, float 1.0 seq_cst, align 4 ++ ret float %v ++} ++ ++define float @float_fmin_seq_cst(ptr %p) nounwind { ++; LA64F-LABEL: float_fmin_seq_cst: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB26_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB26_3 Depth 2 ++; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB26_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB26_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB26_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB26_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB26_3 ++; LA64F-NEXT: b .LBB26_6 ++; LA64F-NEXT: .LBB26_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 ++; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: .LBB26_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB26_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fmin_seq_cst: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB26_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB26_3 Depth 2 ++; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB26_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB26_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB26_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB26_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB26_3 ++; LA64D-NEXT: b .LBB26_6 ++; LA64D-NEXT: .LBB26_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 ++; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: .LBB26_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB26_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fmin ptr %p, float 1.0 seq_cst, align 4 ++ ret float %v ++} ++ ++define float @float_fmax_seq_cst(ptr %p) nounwind { ++; LA64F-LABEL: float_fmax_seq_cst: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB27_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB27_3 Depth 2 ++; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB27_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB27_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB27_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB27_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB27_3 ++; LA64F-NEXT: b .LBB27_6 ++; LA64F-NEXT: .LBB27_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 ++; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: .LBB27_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB27_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fmax_seq_cst: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB27_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB27_3 Depth 2 ++; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB27_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB27_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB27_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB27_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB27_3 ++; LA64D-NEXT: b .LBB27_6 ++; LA64D-NEXT: .LBB27_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 ++; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: .LBB27_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB27_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fmax ptr %p, float 1.0 seq_cst, align 4 ++ ret float %v ++} ++ ++define double @double_fadd_seq_cst(ptr %p) nounwind { ++; LA64F-LABEL: double_fadd_seq_cst: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 16 ++; LA64F-NEXT: addi.d $s3, $sp, 8 ++; LA64F-NEXT: ori $s4, $zero, 5 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB28_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 16 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(__adddf3) ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $s4 ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 16 ++; LA64F-NEXT: beqz $a1, .LBB28_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fadd_seq_cst: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 16 ++; LA64D-NEXT: addi.d $s2, $sp, 8 ++; LA64D-NEXT: ori $s3, $zero, 5 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB28_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 16 ++; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $s3 ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 16 ++; LA64D-NEXT: beqz $a0, .LBB28_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fadd ptr %p, double 1.0 seq_cst, align 4 ++ ret double %v ++} ++ ++define double @double_fsub_seq_cst(ptr %p) nounwind { ++; LA64F-LABEL: double_fsub_seq_cst: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, -1025 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 16 ++; LA64F-NEXT: addi.d $s3, $sp, 8 ++; LA64F-NEXT: ori $s4, $zero, 5 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB29_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 16 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(__adddf3) ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $s4 ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 16 ++; LA64F-NEXT: beqz $a1, .LBB29_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fsub_seq_cst: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI29_0) ++; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI29_0) ++; LA64D-NEXT: fld.d $fs0, $a0, 0 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 16 ++; LA64D-NEXT: addi.d $s2, $sp, 8 ++; LA64D-NEXT: ori $s3, $zero, 5 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB29_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 16 ++; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $s3 ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 16 ++; LA64D-NEXT: beqz $a0, .LBB29_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fsub ptr %p, double 1.0 seq_cst, align 4 ++ ret double %v ++} ++ ++define double @double_fmin_seq_cst(ptr %p) nounwind { ++; LA64F-LABEL: double_fmin_seq_cst: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 16 ++; LA64F-NEXT: addi.d $s3, $sp, 8 ++; LA64F-NEXT: ori $s4, $zero, 5 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB30_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 16 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(fmin) ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $s4 ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 16 ++; LA64F-NEXT: beqz $a1, .LBB30_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fmin_seq_cst: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 16 ++; LA64D-NEXT: addi.d $s2, $sp, 8 ++; LA64D-NEXT: ori $s3, $zero, 5 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB30_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 16 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 ++; LA64D-NEXT: fmin.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $s3 ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 16 ++; LA64D-NEXT: beqz $a0, .LBB30_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fmin ptr %p, double 1.0 seq_cst, align 4 ++ ret double %v ++} ++ ++define double @double_fmax_seq_cst(ptr %p) nounwind { ++; LA64F-LABEL: double_fmax_seq_cst: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 16 ++; LA64F-NEXT: addi.d $s3, $sp, 8 ++; LA64F-NEXT: ori $s4, $zero, 5 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB31_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 16 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(fmax) ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $s4 ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 16 ++; LA64F-NEXT: beqz $a1, .LBB31_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fmax_seq_cst: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 16 ++; LA64D-NEXT: addi.d $s2, $sp, 8 ++; LA64D-NEXT: ori $s3, $zero, 5 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB31_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 16 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $s3 ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 16 ++; LA64D-NEXT: beqz $a0, .LBB31_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fmax ptr %p, double 1.0 seq_cst, align 4 ++ ret double %v ++} ++ ++define float @float_fadd_monotonic(ptr %p) nounwind { ++; LA64F-LABEL: float_fadd_monotonic: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB32_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB32_3 Depth 2 ++; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB32_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB32_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB32_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB32_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB32_3 ++; LA64F-NEXT: b .LBB32_6 ++; LA64F-NEXT: .LBB32_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB32_1 Depth=1 ++; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: .LBB32_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB32_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB32_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fadd_monotonic: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB32_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB32_3 Depth 2 ++; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB32_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB32_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB32_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB32_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB32_3 ++; LA64D-NEXT: b .LBB32_6 ++; LA64D-NEXT: .LBB32_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB32_1 Depth=1 ++; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: .LBB32_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB32_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB32_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fadd ptr %p, float 1.0 monotonic, align 4 ++ ret float %v ++} ++ ++define float @float_fsub_monotonic(ptr %p) nounwind { ++; LA64F-LABEL: float_fsub_monotonic: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI33_0) ++; LA64F-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI33_0) ++; LA64F-NEXT: fld.s $fa1, $a1, 0 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB33_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB33_3 Depth 2 ++; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB33_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB33_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB33_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB33_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB33_3 ++; LA64F-NEXT: b .LBB33_6 ++; LA64F-NEXT: .LBB33_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB33_1 Depth=1 ++; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: .LBB33_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB33_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB33_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fsub_monotonic: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: pcalau12i $a1, %pc_hi20(.LCPI33_0) ++; LA64D-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI33_0) ++; LA64D-NEXT: fld.s $fa1, $a1, 0 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB33_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB33_3 Depth 2 ++; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB33_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB33_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB33_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB33_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB33_3 ++; LA64D-NEXT: b .LBB33_6 ++; LA64D-NEXT: .LBB33_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB33_1 Depth=1 ++; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: .LBB33_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB33_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB33_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fsub ptr %p, float 1.0 monotonic, align 4 ++ ret float %v ++} ++ ++define float @float_fmin_monotonic(ptr %p) nounwind { ++; LA64F-LABEL: float_fmin_monotonic: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB34_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB34_3 Depth 2 ++; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB34_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB34_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB34_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB34_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB34_3 ++; LA64F-NEXT: b .LBB34_6 ++; LA64F-NEXT: .LBB34_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB34_1 Depth=1 ++; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: .LBB34_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB34_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB34_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fmin_monotonic: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB34_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB34_3 Depth 2 ++; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB34_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB34_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB34_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB34_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB34_3 ++; LA64D-NEXT: b .LBB34_6 ++; LA64D-NEXT: .LBB34_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB34_1 Depth=1 ++; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: .LBB34_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB34_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB34_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fmin ptr %p, float 1.0 monotonic, align 4 ++ ret float %v ++} ++ ++define float @float_fmax_monotonic(ptr %p) nounwind { ++; LA64F-LABEL: float_fmax_monotonic: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB35_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB35_3 Depth 2 ++; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB35_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB35_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB35_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB35_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB35_3 ++; LA64F-NEXT: b .LBB35_6 ++; LA64F-NEXT: .LBB35_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB35_1 Depth=1 ++; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: .LBB35_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB35_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB35_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fmax_monotonic: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB35_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB35_3 Depth 2 ++; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB35_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB35_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB35_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB35_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB35_3 ++; LA64D-NEXT: b .LBB35_6 ++; LA64D-NEXT: .LBB35_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB35_1 Depth=1 ++; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: .LBB35_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB35_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB35_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fmax ptr %p, float 1.0 monotonic, align 4 ++ ret float %v ++} ++ ++define double @double_fadd_monotonic(ptr %p) nounwind { ++; LA64F-LABEL: double_fadd_monotonic: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -64 ++; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 8 ++; LA64F-NEXT: addi.d $s3, $sp, 0 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB36_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(__adddf3) ++; LA64F-NEXT: st.d $a0, $sp, 0 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $zero ++; LA64F-NEXT: move $a5, $zero ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 8 ++; LA64F-NEXT: beqz $a1, .LBB36_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 64 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fadd_monotonic: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -64 ++; LA64D-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 8 ++; LA64D-NEXT: addi.d $s2, $sp, 0 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB36_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 0 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $zero ++; LA64D-NEXT: move $a5, $zero ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 8 ++; LA64D-NEXT: beqz $a0, .LBB36_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 64 ++; LA64D-NEXT: ret ++ %v = atomicrmw fadd ptr %p, double 1.0 monotonic, align 4 ++ ret double %v ++} ++ ++define double @double_fsub_monotonic(ptr %p) nounwind { ++; LA64F-LABEL: double_fsub_monotonic: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -64 ++; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, -1025 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 8 ++; LA64F-NEXT: addi.d $s3, $sp, 0 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB37_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(__adddf3) ++; LA64F-NEXT: st.d $a0, $sp, 0 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $zero ++; LA64F-NEXT: move $a5, $zero ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 8 ++; LA64F-NEXT: beqz $a1, .LBB37_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 64 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fsub_monotonic: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -64 ++; LA64D-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI37_0) ++; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI37_0) ++; LA64D-NEXT: fld.d $fs0, $a0, 0 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 8 ++; LA64D-NEXT: addi.d $s2, $sp, 0 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB37_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 0 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $zero ++; LA64D-NEXT: move $a5, $zero ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 8 ++; LA64D-NEXT: beqz $a0, .LBB37_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 64 ++; LA64D-NEXT: ret ++ %v = atomicrmw fsub ptr %p, double 1.0 monotonic, align 4 ++ ret double %v ++} ++ ++define double @double_fmin_monotonic(ptr %p) nounwind { ++; LA64F-LABEL: double_fmin_monotonic: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -64 ++; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 8 ++; LA64F-NEXT: addi.d $s3, $sp, 0 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB38_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(fmin) ++; LA64F-NEXT: st.d $a0, $sp, 0 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $zero ++; LA64F-NEXT: move $a5, $zero ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 8 ++; LA64F-NEXT: beqz $a1, .LBB38_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 64 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fmin_monotonic: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -64 ++; LA64D-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 8 ++; LA64D-NEXT: addi.d $s2, $sp, 0 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB38_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 ++; LA64D-NEXT: fmin.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 0 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $zero ++; LA64D-NEXT: move $a5, $zero ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 8 ++; LA64D-NEXT: beqz $a0, .LBB38_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 64 ++; LA64D-NEXT: ret ++ %v = atomicrmw fmin ptr %p, double 1.0 monotonic, align 4 ++ ret double %v ++} ++ ++define double @double_fmax_monotonic(ptr %p) nounwind { ++; LA64F-LABEL: double_fmax_monotonic: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -64 ++; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 8 ++; LA64F-NEXT: addi.d $s3, $sp, 0 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB39_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(fmax) ++; LA64F-NEXT: st.d $a0, $sp, 0 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $zero ++; LA64F-NEXT: move $a5, $zero ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 8 ++; LA64F-NEXT: beqz $a1, .LBB39_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 64 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fmax_monotonic: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -64 ++; LA64D-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 8 ++; LA64D-NEXT: addi.d $s2, $sp, 0 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB39_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 0 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $zero ++; LA64D-NEXT: move $a5, $zero ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 8 ++; LA64D-NEXT: beqz $a0, .LBB39_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 64 ++; LA64D-NEXT: ret ++ %v = atomicrmw fmax ptr %p, double 1.0 monotonic, align 4 ++ ret double %v ++} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll +index 26ba77e8d4fd..770358a05bfd 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll +@@ -353,3 +353,1403 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind { + %1 = atomicrmw min ptr %a, i64 %b acquire + ret i64 %1 + } ++ ++define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a6, $a1, .LBB16_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB16_3: # in Loop: Header=BB16_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB16_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a6, $a1, .LBB17_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB17_3: # in Loop: Header=BB17_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB17_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umax_i32_release(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i32_release: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.wu $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i64_release: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.du $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a1, $a6, .LBB20_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB20_3: # in Loop: Header=BB20_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB20_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a1, $a6, .LBB21_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB21_3: # in Loop: Header=BB21_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB21_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umin_i32_release(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i32_release: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.wu $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i64_release: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.du $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: ext.w.b $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: andi $a4, $a0, 24 ++; LA64-NEXT: xori $a4, $a4, 56 ++; LA64-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a3 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a4 ++; LA64-NEXT: sra.w $a7, $a7, $a4 ++; LA64-NEXT: bge $a7, $a1, .LBB24_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a3 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB24_3: # in Loop: Header=BB24_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB24_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a3, $a0, 24 ++; LA64-NEXT: ori $a4, $zero, 48 ++; LA64-NEXT: sub.d $a3, $a4, $a3 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: addi.w $a4, $a4, 0 ++; LA64-NEXT: ext.w.h $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a4 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a3 ++; LA64-NEXT: sra.w $a7, $a7, $a3 ++; LA64-NEXT: bge $a7, $a1, .LBB25_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a4 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB25_3: # in Loop: Header=BB25_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB25_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_max_i32_release(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i32_release: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i64_release: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: ext.w.b $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: andi $a4, $a0, 24 ++; LA64-NEXT: xori $a4, $a4, 56 ++; LA64-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a3 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a4 ++; LA64-NEXT: sra.w $a7, $a7, $a4 ++; LA64-NEXT: bge $a1, $a7, .LBB28_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a3 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB28_3: # in Loop: Header=BB28_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB28_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a3, $a0, 24 ++; LA64-NEXT: ori $a4, $zero, 48 ++; LA64-NEXT: sub.d $a3, $a4, $a3 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: addi.w $a4, $a4, 0 ++; LA64-NEXT: ext.w.h $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a4 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a3 ++; LA64-NEXT: sra.w $a7, $a7, $a3 ++; LA64-NEXT: bge $a1, $a7, .LBB29_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a4 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB29_3: # in Loop: Header=BB29_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB29_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_min_i32_release(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i32_release: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i64_release: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a6, $a1, .LBB32_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB32_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB32_3: # in Loop: Header=BB32_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB32_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a6, $a1, .LBB33_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB33_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB33_3: # in Loop: Header=BB33_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB33_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umax_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i32_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.wu $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i64_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.du $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a1, $a6, .LBB36_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB36_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB36_3: # in Loop: Header=BB36_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB36_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a1, $a6, .LBB37_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB37_3: # in Loop: Header=BB37_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB37_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umin_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i32_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.wu $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i64_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.du $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: ext.w.b $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: andi $a4, $a0, 24 ++; LA64-NEXT: xori $a4, $a4, 56 ++; LA64-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a3 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a4 ++; LA64-NEXT: sra.w $a7, $a7, $a4 ++; LA64-NEXT: bge $a7, $a1, .LBB40_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB40_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a3 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB40_3: # in Loop: Header=BB40_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB40_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a3, $a0, 24 ++; LA64-NEXT: ori $a4, $zero, 48 ++; LA64-NEXT: sub.d $a3, $a4, $a3 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: addi.w $a4, $a4, 0 ++; LA64-NEXT: ext.w.h $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a4 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a3 ++; LA64-NEXT: sra.w $a7, $a7, $a3 ++; LA64-NEXT: bge $a7, $a1, .LBB41_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB41_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a4 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB41_3: # in Loop: Header=BB41_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB41_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_max_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i32_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i64_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: ext.w.b $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: andi $a4, $a0, 24 ++; LA64-NEXT: xori $a4, $a4, 56 ++; LA64-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a3 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a4 ++; LA64-NEXT: sra.w $a7, $a7, $a4 ++; LA64-NEXT: bge $a1, $a7, .LBB44_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB44_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a3 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB44_3: # in Loop: Header=BB44_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB44_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a3, $a0, 24 ++; LA64-NEXT: ori $a4, $zero, 48 ++; LA64-NEXT: sub.d $a3, $a4, $a3 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: addi.w $a4, $a4, 0 ++; LA64-NEXT: ext.w.h $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a4 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a3 ++; LA64-NEXT: sra.w $a7, $a7, $a3 ++; LA64-NEXT: bge $a1, $a7, .LBB45_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a4 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB45_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_min_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i32_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i64_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a6, $a1, .LBB48_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB48_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a6, $a1, .LBB49_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB49_3: # in Loop: Header=BB49_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB49_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umax_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i32_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.wu $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i64_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.du $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a1, $a6, .LBB52_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB52_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a1, $a6, .LBB53_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB53_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umin_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i32_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.wu $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i64_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.du $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: ext.w.b $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: andi $a4, $a0, 24 ++; LA64-NEXT: xori $a4, $a4, 56 ++; LA64-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a3 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a4 ++; LA64-NEXT: sra.w $a7, $a7, $a4 ++; LA64-NEXT: bge $a7, $a1, .LBB56_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a3 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB56_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a3, $a0, 24 ++; LA64-NEXT: ori $a4, $zero, 48 ++; LA64-NEXT: sub.d $a3, $a4, $a3 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: addi.w $a4, $a4, 0 ++; LA64-NEXT: ext.w.h $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a4 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a3 ++; LA64-NEXT: sra.w $a7, $a7, $a3 ++; LA64-NEXT: bge $a7, $a1, .LBB57_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a4 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB57_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_max_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i32_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i64_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: ext.w.b $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: andi $a4, $a0, 24 ++; LA64-NEXT: xori $a4, $a4, 56 ++; LA64-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a3 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a4 ++; LA64-NEXT: sra.w $a7, $a7, $a4 ++; LA64-NEXT: bge $a1, $a7, .LBB60_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB60_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a3 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB60_3: # in Loop: Header=BB60_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB60_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a3, $a0, 24 ++; LA64-NEXT: ori $a4, $zero, 48 ++; LA64-NEXT: sub.d $a3, $a4, $a3 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: addi.w $a4, $a4, 0 ++; LA64-NEXT: ext.w.h $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a4 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a3 ++; LA64-NEXT: sra.w $a7, $a7, $a3 ++; LA64-NEXT: bge $a1, $a7, .LBB61_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a4 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB61_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_min_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i32_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i64_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i8_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a6, $a1, .LBB64_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB64_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB64_3: # in Loop: Header=BB64_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB64_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i16_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a6, $a1, .LBB65_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB65_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB65_3: # in Loop: Header=BB65_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB65_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i32_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.wu $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i64_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.du $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i8_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a1, $a6, .LBB68_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB68_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB68_3: # in Loop: Header=BB68_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB68_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i16_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a1, $a6, .LBB69_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB69_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB69_3: # in Loop: Header=BB69_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB69_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i32_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.wu $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i64_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.du $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i8_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: ext.w.b $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: andi $a4, $a0, 24 ++; LA64-NEXT: xori $a4, $a4, 56 ++; LA64-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a3 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a4 ++; LA64-NEXT: sra.w $a7, $a7, $a4 ++; LA64-NEXT: bge $a7, $a1, .LBB72_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB72_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a3 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB72_3: # in Loop: Header=BB72_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB72_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i16_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a3, $a0, 24 ++; LA64-NEXT: ori $a4, $zero, 48 ++; LA64-NEXT: sub.d $a3, $a4, $a3 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: addi.w $a4, $a4, 0 ++; LA64-NEXT: ext.w.h $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a4 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a3 ++; LA64-NEXT: sra.w $a7, $a7, $a3 ++; LA64-NEXT: bge $a7, $a1, .LBB73_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB73_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a4 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB73_3: # in Loop: Header=BB73_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB73_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i32_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i64_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i8_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: ext.w.b $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: andi $a4, $a0, 24 ++; LA64-NEXT: xori $a4, $a4, 56 ++; LA64-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a3 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a4 ++; LA64-NEXT: sra.w $a7, $a7, $a4 ++; LA64-NEXT: bge $a1, $a7, .LBB76_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB76_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a3 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB76_3: # in Loop: Header=BB76_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB76_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i16_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a3, $a0, 24 ++; LA64-NEXT: ori $a4, $zero, 48 ++; LA64-NEXT: sub.d $a3, $a4, $a3 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: addi.w $a4, $a4, 0 ++; LA64-NEXT: ext.w.h $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a4 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a3 ++; LA64-NEXT: sra.w $a7, $a7, $a3 ++; LA64-NEXT: bge $a1, $a7, .LBB77_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB77_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a4 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB77_3: # in Loop: Header=BB77_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB77_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i32_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i64_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i64 %b monotonic ++ ret i64 %1 ++} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll +index 626276ba05f7..94a26e4ed9c7 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll +@@ -900,6 +900,3228 @@ define i64 @atomicrmw_xor_i64_acquire(ptr %a, i64 %b) nounwind { + ret i64 %1 + } + ++define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i8_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: addi.w $a5, $a1, 0 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB28_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: addi.w $a5, $a1, 0 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB28_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_0_i8_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a2, $zero, 255 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $zero, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB29_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_0_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a2, $zero, 255 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $zero, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB29_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i8 0 release ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_minus_1_i8_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a2, $zero, 255 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB30_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_minus_1_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a2, $zero, 255 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $a2, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB30_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i8 -1 release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i16_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: addi.w $a5, $a1, 0 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB31_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: addi.w $a5, $a1, 0 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB31_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_0_i16_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a2, 15 ++; LA32-NEXT: ori $a2, $a2, 4095 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $zero, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB32_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_0_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a2, 15 ++; LA64-NEXT: ori $a2, $a2, 4095 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $zero, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB32_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i16 0 release ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_minus_1_i16_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a2, 15 ++; LA32-NEXT: ori $a2, $a2, 4095 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB33_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_minus_1_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a2, 15 ++; LA64-NEXT: ori $a2, $a2, 4095 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $a2, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB33_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i16 -1 release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xchg_i32_release(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i32_release: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: move $a3, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB34_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i32_release: ++; LA64: # %bb.0: ++; LA64-NEXT: amswap_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xchg_i64_release(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i64_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 3 ++; LA32-NEXT: bl %plt(__atomic_exchange_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i64_release: ++; LA64: # %bb.0: ++; LA64-NEXT: amswap_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i8_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: add.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB36_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: add.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB36_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i16_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: add.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB37_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: add.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB37_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_add_i32_release(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i32_release: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: add.w $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB38_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i32_release: ++; LA64: # %bb.0: ++; LA64-NEXT: amadd_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_add_i64_release(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i64_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 3 ++; LA32-NEXT: bl %plt(__atomic_fetch_add_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i64_release: ++; LA64: # %bb.0: ++; LA64-NEXT: amadd_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i8_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: sub.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB40_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: sub.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB40_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i16_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: sub.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB41_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: sub.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB41_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_sub_i32_release(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i32_release: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: sub.w $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB42_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i32_release: ++; LA64: # %bb.0: ++; LA64-NEXT: sub.w $a2, $zero, $a1 ++; LA64-NEXT: amadd_db.w $a1, $a2, $a0 ++; LA64-NEXT: move $a0, $a1 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_sub_i64_release(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i64_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 3 ++; LA32-NEXT: bl %plt(__atomic_fetch_sub_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i64_release: ++; LA64: # %bb.0: ++; LA64-NEXT: sub.d $a2, $zero, $a1 ++; LA64-NEXT: amadd_db.d $a1, $a2, $a0 ++; LA64-NEXT: move $a0, $a1 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i8_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: and $a5, $a4, $a1 ++; LA32-NEXT: nor $a5, $a5, $zero ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB44_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a5, $a4, $a1 ++; LA64-NEXT: nor $a5, $a5, $zero ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB44_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i16_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: and $a5, $a4, $a1 ++; LA32-NEXT: nor $a5, $a5, $zero ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB45_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a5, $a4, $a1 ++; LA64-NEXT: nor $a5, $a5, $zero ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB45_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i32_release: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: and $a3, $a2, $a1 ++; LA32-NEXT: nor $a3, $a3, $zero ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB46_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i32_release: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a2, $a0, 0 ++; LA64-NEXT: and $a3, $a2, $a1 ++; LA64-NEXT: nor $a3, $a3, $zero ++; LA64-NEXT: sc.w $a3, $a0, 0 ++; LA64-NEXT: beqz $a3, .LBB46_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_nand_i64_release(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i64_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 3 ++; LA32-NEXT: bl %plt(__atomic_fetch_nand_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i64_release: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $a2, $a0, 0 ++; LA64-NEXT: and $a3, $a2, $a1 ++; LA64-NEXT: nor $a3, $a3, $zero ++; LA64-NEXT: sc.d $a3, $a0, 0 ++; LA64-NEXT: beqz $a3, .LBB47_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i8_release: ++; LA32: # %bb.0: ++; LA32-NEXT: slli.w $a2, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a2 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a2 ++; LA32-NEXT: orn $a1, $a1, $a3 ++; LA32-NEXT: addi.w $a3, $zero, -4 ++; LA32-NEXT: and $a0, $a0, $a3 ++; LA32-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a0, 0 ++; LA32-NEXT: and $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a0, 0 ++; LA32-NEXT: beqz $a4, .LBB48_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.d $a2, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a2 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a2 ++; LA64-NEXT: orn $a1, $a1, $a3 ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a0, $a0, $a3 ++; LA64-NEXT: amand_db.w $a3, $a1, $a0 ++; LA64-NEXT: srl.w $a0, $a3, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i16_release: ++; LA32: # %bb.0: ++; LA32-NEXT: lu12i.w $a2, 15 ++; LA32-NEXT: ori $a2, $a2, 4095 ++; LA32-NEXT: slli.w $a3, $a0, 3 ++; LA32-NEXT: sll.w $a2, $a2, $a3 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a3 ++; LA32-NEXT: orn $a1, $a1, $a2 ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a0, $a0, $a2 ++; LA32-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: and $a4, $a2, $a1 ++; LA32-NEXT: sc.w $a4, $a0, 0 ++; LA32-NEXT: beqz $a4, .LBB49_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a2, $a3 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: lu12i.w $a2, 15 ++; LA64-NEXT: ori $a2, $a2, 4095 ++; LA64-NEXT: slli.d $a3, $a0, 3 ++; LA64-NEXT: sll.w $a2, $a2, $a3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a3 ++; LA64-NEXT: orn $a1, $a1, $a2 ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a0, $a0, $a2 ++; LA64-NEXT: amand_db.w $a2, $a1, $a0 ++; LA64-NEXT: srl.w $a0, $a2, $a3 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_and_i32_release(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i32_release: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: and $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB50_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i32_release: ++; LA64: # %bb.0: ++; LA64-NEXT: amand_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_and_i64_release(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i64_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 3 ++; LA32-NEXT: bl %plt(__atomic_fetch_and_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i64_release: ++; LA64: # %bb.0: ++; LA64-NEXT: amand_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i8_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: or $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB52_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i16_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: or $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB53_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_or_i32_release(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i32_release: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: or $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB54_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i32_release: ++; LA64: # %bb.0: ++; LA64-NEXT: amor_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_or_i64_release(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i64_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 3 ++; LA32-NEXT: bl %plt(__atomic_fetch_or_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i64_release: ++; LA64: # %bb.0: ++; LA64-NEXT: amor_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i8_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB56_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amxor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i16_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB57_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amxor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xor_i32_release(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i32_release: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: xor $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB58_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i32_release: ++; LA64: # %bb.0: ++; LA64-NEXT: amxor_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xor_i64_release(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i64_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 3 ++; LA32-NEXT: bl %plt(__atomic_fetch_xor_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i64_release: ++; LA64: # %bb.0: ++; LA64-NEXT: amxor_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i8_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: addi.w $a5, $a1, 0 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB60_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: addi.w $a5, $a1, 0 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB60_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_0_i8_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a2, $zero, 255 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $zero, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB61_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_0_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a2, $zero, 255 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $zero, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB61_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i8 0 acq_rel ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a2, $zero, 255 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB62_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a2, $zero, 255 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $a2, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB62_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i8 -1 acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i16_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: addi.w $a5, $a1, 0 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB63_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: addi.w $a5, $a1, 0 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB63_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_0_i16_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a2, 15 ++; LA32-NEXT: ori $a2, $a2, 4095 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $zero, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB64_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_0_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a2, 15 ++; LA64-NEXT: ori $a2, $a2, 4095 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $zero, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB64_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i16 0 acq_rel ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a2, 15 ++; LA32-NEXT: ori $a2, $a2, 4095 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB65_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a2, 15 ++; LA64-NEXT: ori $a2, $a2, 4095 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $a2, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB65_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i16 -1 acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xchg_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i32_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: move $a3, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB66_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i32_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: amswap_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xchg_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i64_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 4 ++; LA32-NEXT: bl %plt(__atomic_exchange_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i64_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: amswap_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i8_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: add.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB68_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: add.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB68_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i16_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: add.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB69_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: add.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB69_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_add_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i32_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB70_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: add.w $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB70_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i32_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: amadd_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_add_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i64_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 4 ++; LA32-NEXT: bl %plt(__atomic_fetch_add_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i64_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: amadd_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i8_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: sub.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB72_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: sub.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB72_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i16_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: sub.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB73_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: sub.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB73_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_sub_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i32_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB74_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: sub.w $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB74_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i32_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: sub.w $a2, $zero, $a1 ++; LA64-NEXT: amadd_db.w $a1, $a2, $a0 ++; LA64-NEXT: move $a0, $a1 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_sub_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i64_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 4 ++; LA32-NEXT: bl %plt(__atomic_fetch_sub_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i64_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: sub.d $a2, $zero, $a1 ++; LA64-NEXT: amadd_db.d $a1, $a2, $a0 ++; LA64-NEXT: move $a0, $a1 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i8_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: and $a5, $a4, $a1 ++; LA32-NEXT: nor $a5, $a5, $zero ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB76_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a5, $a4, $a1 ++; LA64-NEXT: nor $a5, $a5, $zero ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB76_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i16_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: and $a5, $a4, $a1 ++; LA32-NEXT: nor $a5, $a5, $zero ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB77_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a5, $a4, $a1 ++; LA64-NEXT: nor $a5, $a5, $zero ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB77_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i32_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB78_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: and $a3, $a2, $a1 ++; LA32-NEXT: nor $a3, $a3, $zero ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB78_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i32_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB78_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a2, $a0, 0 ++; LA64-NEXT: and $a3, $a2, $a1 ++; LA64-NEXT: nor $a3, $a3, $zero ++; LA64-NEXT: sc.w $a3, $a0, 0 ++; LA64-NEXT: beqz $a3, .LBB78_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_nand_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i64_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 4 ++; LA32-NEXT: bl %plt(__atomic_fetch_nand_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i64_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB79_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $a2, $a0, 0 ++; LA64-NEXT: and $a3, $a2, $a1 ++; LA64-NEXT: nor $a3, $a3, $zero ++; LA64-NEXT: sc.d $a3, $a0, 0 ++; LA64-NEXT: beqz $a3, .LBB79_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i8_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: slli.w $a2, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a2 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a2 ++; LA32-NEXT: orn $a1, $a1, $a3 ++; LA32-NEXT: addi.w $a3, $zero, -4 ++; LA32-NEXT: and $a0, $a0, $a3 ++; LA32-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a0, 0 ++; LA32-NEXT: and $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a0, 0 ++; LA32-NEXT: beqz $a4, .LBB80_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.d $a2, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a2 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a2 ++; LA64-NEXT: orn $a1, $a1, $a3 ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a0, $a0, $a3 ++; LA64-NEXT: amand_db.w $a3, $a1, $a0 ++; LA64-NEXT: srl.w $a0, $a3, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i16_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: lu12i.w $a2, 15 ++; LA32-NEXT: ori $a2, $a2, 4095 ++; LA32-NEXT: slli.w $a3, $a0, 3 ++; LA32-NEXT: sll.w $a2, $a2, $a3 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a3 ++; LA32-NEXT: orn $a1, $a1, $a2 ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a0, $a0, $a2 ++; LA32-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: and $a4, $a2, $a1 ++; LA32-NEXT: sc.w $a4, $a0, 0 ++; LA32-NEXT: beqz $a4, .LBB81_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a2, $a3 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: lu12i.w $a2, 15 ++; LA64-NEXT: ori $a2, $a2, 4095 ++; LA64-NEXT: slli.d $a3, $a0, 3 ++; LA64-NEXT: sll.w $a2, $a2, $a3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a3 ++; LA64-NEXT: orn $a1, $a1, $a2 ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a0, $a0, $a2 ++; LA64-NEXT: amand_db.w $a2, $a1, $a0 ++; LA64-NEXT: srl.w $a0, $a2, $a3 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_and_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i32_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: and $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB82_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i32_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: amand_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_and_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i64_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 4 ++; LA32-NEXT: bl %plt(__atomic_fetch_and_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i64_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: amand_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i8_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: or $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB84_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i16_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: or $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB85_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_or_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i32_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: or $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB86_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i32_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: amor_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_or_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i64_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 4 ++; LA32-NEXT: bl %plt(__atomic_fetch_or_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i64_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: amor_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i8_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB88_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amxor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i16_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB89_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amxor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xor_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i32_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB90_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: xor $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB90_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i32_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: amxor_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xor_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i64_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 4 ++; LA32-NEXT: bl %plt(__atomic_fetch_xor_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i64_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: amxor_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i8_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: addi.w $a5, $a1, 0 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB92_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: addi.w $a5, $a1, 0 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB92_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_0_i8_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a2, $zero, 255 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $zero, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB93_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_0_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a2, $zero, 255 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $zero, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB93_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i8 0 seq_cst ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a2, $zero, 255 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB94_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a2, $zero, 255 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $a2, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB94_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i8 -1 seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i16_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: addi.w $a5, $a1, 0 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB95_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: addi.w $a5, $a1, 0 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB95_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_0_i16_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a2, 15 ++; LA32-NEXT: ori $a2, $a2, 4095 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $zero, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB96_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_0_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a2, 15 ++; LA64-NEXT: ori $a2, $a2, 4095 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $zero, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB96_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i16 0 seq_cst ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a2, 15 ++; LA32-NEXT: ori $a2, $a2, 4095 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB97_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a2, 15 ++; LA64-NEXT: ori $a2, $a2, 4095 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $a2, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB97_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i16 -1 seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xchg_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i32_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: move $a3, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB98_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i32_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: amswap_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xchg_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i64_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 5 ++; LA32-NEXT: bl %plt(__atomic_exchange_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i64_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: amswap_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i8_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: add.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB100_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: add.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB100_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i16_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: add.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB101_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: add.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB101_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_add_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i32_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB102_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: add.w $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB102_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i32_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: amadd_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_add_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i64_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 5 ++; LA32-NEXT: bl %plt(__atomic_fetch_add_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i64_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: amadd_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i8_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: sub.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB104_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: sub.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB104_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i16_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: sub.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB105_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: sub.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB105_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_sub_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i32_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB106_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: sub.w $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB106_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i32_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: sub.w $a2, $zero, $a1 ++; LA64-NEXT: amadd_db.w $a1, $a2, $a0 ++; LA64-NEXT: move $a0, $a1 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_sub_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i64_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 5 ++; LA32-NEXT: bl %plt(__atomic_fetch_sub_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i64_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: sub.d $a2, $zero, $a1 ++; LA64-NEXT: amadd_db.d $a1, $a2, $a0 ++; LA64-NEXT: move $a0, $a1 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i8_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: and $a5, $a4, $a1 ++; LA32-NEXT: nor $a5, $a5, $zero ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB108_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a5, $a4, $a1 ++; LA64-NEXT: nor $a5, $a5, $zero ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB108_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i16_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: and $a5, $a4, $a1 ++; LA32-NEXT: nor $a5, $a5, $zero ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB109_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a5, $a4, $a1 ++; LA64-NEXT: nor $a5, $a5, $zero ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB109_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i32_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: and $a3, $a2, $a1 ++; LA32-NEXT: nor $a3, $a3, $zero ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB110_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i32_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a2, $a0, 0 ++; LA64-NEXT: and $a3, $a2, $a1 ++; LA64-NEXT: nor $a3, $a3, $zero ++; LA64-NEXT: sc.w $a3, $a0, 0 ++; LA64-NEXT: beqz $a3, .LBB110_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i64_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 5 ++; LA32-NEXT: bl %plt(__atomic_fetch_nand_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i64_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $a2, $a0, 0 ++; LA64-NEXT: and $a3, $a2, $a1 ++; LA64-NEXT: nor $a3, $a3, $zero ++; LA64-NEXT: sc.d $a3, $a0, 0 ++; LA64-NEXT: beqz $a3, .LBB111_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i8_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: slli.w $a2, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a2 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a2 ++; LA32-NEXT: orn $a1, $a1, $a3 ++; LA32-NEXT: addi.w $a3, $zero, -4 ++; LA32-NEXT: and $a0, $a0, $a3 ++; LA32-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a0, 0 ++; LA32-NEXT: and $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a0, 0 ++; LA32-NEXT: beqz $a4, .LBB112_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.d $a2, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a2 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a2 ++; LA64-NEXT: orn $a1, $a1, $a3 ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a0, $a0, $a3 ++; LA64-NEXT: amand_db.w $a3, $a1, $a0 ++; LA64-NEXT: srl.w $a0, $a3, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i16_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: lu12i.w $a2, 15 ++; LA32-NEXT: ori $a2, $a2, 4095 ++; LA32-NEXT: slli.w $a3, $a0, 3 ++; LA32-NEXT: sll.w $a2, $a2, $a3 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a3 ++; LA32-NEXT: orn $a1, $a1, $a2 ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a0, $a0, $a2 ++; LA32-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: and $a4, $a2, $a1 ++; LA32-NEXT: sc.w $a4, $a0, 0 ++; LA32-NEXT: beqz $a4, .LBB113_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a2, $a3 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: lu12i.w $a2, 15 ++; LA64-NEXT: ori $a2, $a2, 4095 ++; LA64-NEXT: slli.d $a3, $a0, 3 ++; LA64-NEXT: sll.w $a2, $a2, $a3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a3 ++; LA64-NEXT: orn $a1, $a1, $a2 ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a0, $a0, $a2 ++; LA64-NEXT: amand_db.w $a2, $a1, $a0 ++; LA64-NEXT: srl.w $a0, $a2, $a3 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_and_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i32_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: and $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB114_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i32_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: amand_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_and_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i64_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 5 ++; LA32-NEXT: bl %plt(__atomic_fetch_and_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i64_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: amand_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i8_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: or $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB116_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i16_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: or $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB117_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_or_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i32_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: or $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB118_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i32_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: amor_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_or_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i64_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 5 ++; LA32-NEXT: bl %plt(__atomic_fetch_or_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i64_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: amor_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i8_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB120_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amxor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i16_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB121_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amxor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xor_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i32_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: xor $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB122_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i32_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: amxor_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xor_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i64_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 5 ++; LA32-NEXT: bl %plt(__atomic_fetch_xor_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i64_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: amxor_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ + define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA32-LABEL: atomicrmw_xchg_i8_monotonic: + ; LA32: # %bb.0: +@@ -910,14 +4132,14 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: sll.w $a3, $a3, $a0 + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: addi.w $a5, $a1, 0 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: and $a5, $a5, $a3 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: sc.w $a5, $a2, 0 +-; LA32-NEXT: beqz $a5, .LBB28_1 ++; LA32-NEXT: beqz $a5, .LBB124_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a4, $a0 + ; LA32-NEXT: ret +@@ -933,14 +4155,14 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: andi $a1, $a1, 255 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: addi.w $a5, $a1, 0 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: and $a5, $a5, $a3 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: sc.w $a5, $a2, 0 +-; LA64-NEXT: beqz $a5, .LBB28_1 ++; LA64-NEXT: beqz $a5, .LBB124_1 + ; LA64-NEXT: # %bb.2: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret +@@ -959,14 +4181,14 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: sll.w $a3, $a3, $a0 + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: addi.w $a5, $a1, 0 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: and $a5, $a5, $a3 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: sc.w $a5, $a2, 0 +-; LA32-NEXT: beqz $a5, .LBB29_1 ++; LA32-NEXT: beqz $a5, .LBB125_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a4, $a0 + ; LA32-NEXT: ret +@@ -983,14 +4205,14 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: addi.w $a5, $a1, 0 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: and $a5, $a5, $a3 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: sc.w $a5, $a2, 0 +-; LA64-NEXT: beqz $a5, .LBB29_1 ++; LA64-NEXT: beqz $a5, .LBB125_1 + ; LA64-NEXT: # %bb.2: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret +@@ -1001,11 +4223,11 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { + define i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_xchg_i32_monotonic: + ; LA32: # %bb.0: +-; LA32-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: move $a3, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +-; LA32-NEXT: beqz $a3, .LBB30_1 ++; LA32-NEXT: beqz $a3, .LBB126_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: move $a0, $a2 + ; LA32-NEXT: ret +@@ -1049,14 +4271,14 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: sll.w $a3, $a3, $a0 + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: add.w $a5, $a4, $a1 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: and $a5, $a5, $a3 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: sc.w $a5, $a2, 0 +-; LA32-NEXT: beqz $a5, .LBB32_1 ++; LA32-NEXT: beqz $a5, .LBB128_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a4, $a0 + ; LA32-NEXT: ret +@@ -1072,14 +4294,14 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: andi $a1, $a1, 255 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: add.w $a5, $a4, $a1 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: and $a5, $a5, $a3 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: sc.w $a5, $a2, 0 +-; LA64-NEXT: beqz $a5, .LBB32_1 ++; LA64-NEXT: beqz $a5, .LBB128_1 + ; LA64-NEXT: # %bb.2: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret +@@ -1098,14 +4320,14 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: sll.w $a3, $a3, $a0 + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: add.w $a5, $a4, $a1 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: and $a5, $a5, $a3 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: sc.w $a5, $a2, 0 +-; LA32-NEXT: beqz $a5, .LBB33_1 ++; LA32-NEXT: beqz $a5, .LBB129_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a4, $a0 + ; LA32-NEXT: ret +@@ -1122,14 +4344,14 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: add.w $a5, $a4, $a1 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: and $a5, $a5, $a3 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: sc.w $a5, $a2, 0 +-; LA64-NEXT: beqz $a5, .LBB33_1 ++; LA64-NEXT: beqz $a5, .LBB129_1 + ; LA64-NEXT: # %bb.2: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret +@@ -1140,11 +4362,11 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { + define i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_add_i32_monotonic: + ; LA32: # %bb.0: +-; LA32-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB130_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: add.w $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +-; LA32-NEXT: beqz $a3, .LBB34_1 ++; LA32-NEXT: beqz $a3, .LBB130_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: move $a0, $a2 + ; LA32-NEXT: ret +@@ -1188,14 +4410,14 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: sll.w $a3, $a3, $a0 + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB132_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: sub.w $a5, $a4, $a1 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: and $a5, $a5, $a3 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: sc.w $a5, $a2, 0 +-; LA32-NEXT: beqz $a5, .LBB36_1 ++; LA32-NEXT: beqz $a5, .LBB132_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a4, $a0 + ; LA32-NEXT: ret +@@ -1211,14 +4433,14 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: andi $a1, $a1, 255 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB132_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: sub.w $a5, $a4, $a1 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: and $a5, $a5, $a3 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: sc.w $a5, $a2, 0 +-; LA64-NEXT: beqz $a5, .LBB36_1 ++; LA64-NEXT: beqz $a5, .LBB132_1 + ; LA64-NEXT: # %bb.2: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret +@@ -1237,14 +4459,14 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: sll.w $a3, $a3, $a0 + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB133_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: sub.w $a5, $a4, $a1 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: and $a5, $a5, $a3 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: sc.w $a5, $a2, 0 +-; LA32-NEXT: beqz $a5, .LBB37_1 ++; LA32-NEXT: beqz $a5, .LBB133_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a4, $a0 + ; LA32-NEXT: ret +@@ -1261,14 +4483,14 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB133_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: sub.w $a5, $a4, $a1 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: and $a5, $a5, $a3 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: sc.w $a5, $a2, 0 +-; LA64-NEXT: beqz $a5, .LBB37_1 ++; LA64-NEXT: beqz $a5, .LBB133_1 + ; LA64-NEXT: # %bb.2: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret +@@ -1279,11 +4501,11 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { + define i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_sub_i32_monotonic: + ; LA32: # %bb.0: +-; LA32-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB134_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: sub.w $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +-; LA32-NEXT: beqz $a3, .LBB38_1 ++; LA32-NEXT: beqz $a3, .LBB134_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: move $a0, $a2 + ; LA32-NEXT: ret +@@ -1329,7 +4551,7 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: sll.w $a3, $a3, $a0 + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB136_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: and $a5, $a4, $a1 + ; LA32-NEXT: nor $a5, $a5, $zero +@@ -1337,7 +4559,7 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: and $a5, $a5, $a3 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: sc.w $a5, $a2, 0 +-; LA32-NEXT: beqz $a5, .LBB40_1 ++; LA32-NEXT: beqz $a5, .LBB136_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a4, $a0 + ; LA32-NEXT: ret +@@ -1353,7 +4575,7 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: andi $a1, $a1, 255 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB136_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: and $a5, $a4, $a1 + ; LA64-NEXT: nor $a5, $a5, $zero +@@ -1361,7 +4583,7 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: and $a5, $a5, $a3 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: sc.w $a5, $a2, 0 +-; LA64-NEXT: beqz $a5, .LBB40_1 ++; LA64-NEXT: beqz $a5, .LBB136_1 + ; LA64-NEXT: # %bb.2: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret +@@ -1380,7 +4602,7 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: sll.w $a3, $a3, $a0 + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB137_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: and $a5, $a4, $a1 + ; LA32-NEXT: nor $a5, $a5, $zero +@@ -1388,7 +4610,7 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: and $a5, $a5, $a3 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: sc.w $a5, $a2, 0 +-; LA32-NEXT: beqz $a5, .LBB41_1 ++; LA32-NEXT: beqz $a5, .LBB137_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a4, $a0 + ; LA32-NEXT: ret +@@ -1405,7 +4627,7 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB137_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: and $a5, $a4, $a1 + ; LA64-NEXT: nor $a5, $a5, $zero +@@ -1413,7 +4635,7 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: and $a5, $a5, $a3 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: sc.w $a5, $a2, 0 +-; LA64-NEXT: beqz $a5, .LBB41_1 ++; LA64-NEXT: beqz $a5, .LBB137_1 + ; LA64-NEXT: # %bb.2: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret +@@ -1424,24 +4646,24 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { + define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_nand_i32_monotonic: + ; LA32: # %bb.0: +-; LA32-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB138_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: and $a3, $a2, $a1 + ; LA32-NEXT: nor $a3, $a3, $zero + ; LA32-NEXT: sc.w $a3, $a0, 0 +-; LA32-NEXT: beqz $a3, .LBB42_1 ++; LA32-NEXT: beqz $a3, .LBB138_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: move $a0, $a2 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: atomicrmw_nand_i32_monotonic: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB138_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a2, $a0, 0 + ; LA64-NEXT: and $a3, $a2, $a1 + ; LA64-NEXT: nor $a3, $a3, $zero + ; LA64-NEXT: sc.w $a3, $a0, 0 +-; LA64-NEXT: beqz $a3, .LBB42_1 ++; LA64-NEXT: beqz $a3, .LBB138_1 + ; LA64-NEXT: # %bb.2: + ; LA64-NEXT: move $a0, $a2 + ; LA64-NEXT: ret +@@ -1462,12 +4684,12 @@ define i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind { + ; + ; LA64-LABEL: atomicrmw_nand_i64_monotonic: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB139_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.d $a2, $a0, 0 + ; LA64-NEXT: and $a3, $a2, $a1 + ; LA64-NEXT: nor $a3, $a3, $zero + ; LA64-NEXT: sc.d $a3, $a0, 0 +-; LA64-NEXT: beqz $a3, .LBB43_1 ++; LA64-NEXT: beqz $a3, .LBB139_1 + ; LA64-NEXT: # %bb.2: + ; LA64-NEXT: move $a0, $a2 + ; LA64-NEXT: ret +@@ -1486,11 +4708,11 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: orn $a1, $a1, $a3 + ; LA32-NEXT: addi.w $a3, $zero, -4 + ; LA32-NEXT: and $a0, $a0, $a3 +-; LA32-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB140_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a3, $a0, 0 + ; LA32-NEXT: and $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a0, 0 +-; LA32-NEXT: beqz $a4, .LBB44_1 ++; LA32-NEXT: beqz $a4, .LBB140_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a3, $a2 + ; LA32-NEXT: ret +@@ -1524,11 +4746,11 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: orn $a1, $a1, $a2 + ; LA32-NEXT: addi.w $a2, $zero, -4 + ; LA32-NEXT: and $a0, $a0, $a2 +-; LA32-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB141_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: and $a4, $a2, $a1 + ; LA32-NEXT: sc.w $a4, $a0, 0 +-; LA32-NEXT: beqz $a4, .LBB45_1 ++; LA32-NEXT: beqz $a4, .LBB141_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a2, $a3 + ; LA32-NEXT: ret +@@ -1554,11 +4776,11 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { + define i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_and_i32_monotonic: + ; LA32: # %bb.0: +-; LA32-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB142_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: and $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +-; LA32-NEXT: beqz $a3, .LBB46_1 ++; LA32-NEXT: beqz $a3, .LBB142_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: move $a0, $a2 + ; LA32-NEXT: ret +@@ -1600,11 +4822,11 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: slli.w $a0, $a0, 3 + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB144_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a3, $a2, 0 + ; LA32-NEXT: or $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a2, 0 +-; LA32-NEXT: beqz $a4, .LBB48_1 ++; LA32-NEXT: beqz $a4, .LBB144_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a3, $a0 + ; LA32-NEXT: ret +@@ -1631,11 +4853,11 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: slli.w $a0, $a0, 3 + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB145_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a3, $a2, 0 + ; LA32-NEXT: or $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a2, 0 +-; LA32-NEXT: beqz $a4, .LBB49_1 ++; LA32-NEXT: beqz $a4, .LBB145_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a3, $a0 + ; LA32-NEXT: ret +@@ -1657,11 +4879,11 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { + define i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_or_i32_monotonic: + ; LA32: # %bb.0: +-; LA32-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB146_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: or $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +-; LA32-NEXT: beqz $a3, .LBB50_1 ++; LA32-NEXT: beqz $a3, .LBB146_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: move $a0, $a2 + ; LA32-NEXT: ret +@@ -1703,11 +4925,11 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: slli.w $a0, $a0, 3 + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB148_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a3, $a2, 0 + ; LA32-NEXT: xor $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a2, 0 +-; LA32-NEXT: beqz $a4, .LBB52_1 ++; LA32-NEXT: beqz $a4, .LBB148_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a3, $a0 + ; LA32-NEXT: ret +@@ -1734,11 +4956,11 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: slli.w $a0, $a0, 3 + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB149_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a3, $a2, 0 + ; LA32-NEXT: xor $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a2, 0 +-; LA32-NEXT: beqz $a4, .LBB53_1 ++; LA32-NEXT: beqz $a4, .LBB149_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a3, $a0 + ; LA32-NEXT: ret +@@ -1760,11 +4982,11 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { + define i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_xor_i32_monotonic: + ; LA32: # %bb.0: +-; LA32-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB150_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: xor $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +-; LA32-NEXT: beqz $a3, .LBB54_1 ++; LA32-NEXT: beqz $a3, .LBB150_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: move $a0, $a2 + ; LA32-NEXT: ret +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll +new file mode 100644 +index 000000000000..8d6056bc7677 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll +@@ -0,0 +1,17 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 ++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 ++ ++define void @fence_singlethread() { ++; LA32-LABEL: fence_singlethread: ++; LA32: # %bb.0: ++; LA32-NEXT: dbar 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: fence_singlethread: ++; LA64: # %bb.0: ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: ret ++ fence syncscope("singlethread") seq_cst ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll +index e91d0c145eab..deff11723d27 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll +@@ -72,6 +72,202 @@ define i64 @load_acquire_i64(ptr %ptr) { + ret i64 %val + } + ++define i8 @load_unordered_i8(ptr %ptr) { ++; LA32-LABEL: load_unordered_i8: ++; LA32: # %bb.0: ++; LA32-NEXT: ld.b $a0, $a0, 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_unordered_i8: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.b $a0, $a0, 0 ++; LA64-NEXT: ret ++ %val = load atomic i8, ptr %ptr unordered, align 1 ++ ret i8 %val ++} ++ ++define i16 @load_unordered_i16(ptr %ptr) { ++; LA32-LABEL: load_unordered_i16: ++; LA32: # %bb.0: ++; LA32-NEXT: ld.h $a0, $a0, 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_unordered_i16: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.h $a0, $a0, 0 ++; LA64-NEXT: ret ++ %val = load atomic i16, ptr %ptr unordered, align 2 ++ ret i16 %val ++} ++ ++define i32 @load_unordered_i32(ptr %ptr) { ++; LA32-LABEL: load_unordered_i32: ++; LA32: # %bb.0: ++; LA32-NEXT: ld.w $a0, $a0, 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_unordered_i32: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.w $a0, $a0, 0 ++; LA64-NEXT: ret ++ %val = load atomic i32, ptr %ptr unordered, align 4 ++ ret i32 %val ++} ++ ++define i64 @load_unordered_i64(ptr %ptr) { ++; LA32-LABEL: load_unordered_i64: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: .cfi_def_cfa_offset 16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: .cfi_offset 1, -4 ++; LA32-NEXT: move $a1, $zero ++; LA32-NEXT: bl %plt(__atomic_load_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_unordered_i64: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.d $a0, $a0, 0 ++; LA64-NEXT: ret ++ %val = load atomic i64, ptr %ptr unordered, align 8 ++ ret i64 %val ++} ++ ++define i8 @load_monotonic_i8(ptr %ptr) { ++; LA32-LABEL: load_monotonic_i8: ++; LA32: # %bb.0: ++; LA32-NEXT: ld.b $a0, $a0, 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_monotonic_i8: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.b $a0, $a0, 0 ++; LA64-NEXT: ret ++ %val = load atomic i8, ptr %ptr monotonic, align 1 ++ ret i8 %val ++} ++ ++define i16 @load_monotonic_i16(ptr %ptr) { ++; LA32-LABEL: load_monotonic_i16: ++; LA32: # %bb.0: ++; LA32-NEXT: ld.h $a0, $a0, 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_monotonic_i16: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.h $a0, $a0, 0 ++; LA64-NEXT: ret ++ %val = load atomic i16, ptr %ptr monotonic, align 2 ++ ret i16 %val ++} ++ ++define i32 @load_monotonic_i32(ptr %ptr) { ++; LA32-LABEL: load_monotonic_i32: ++; LA32: # %bb.0: ++; LA32-NEXT: ld.w $a0, $a0, 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_monotonic_i32: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.w $a0, $a0, 0 ++; LA64-NEXT: ret ++ %val = load atomic i32, ptr %ptr monotonic, align 4 ++ ret i32 %val ++} ++ ++define i64 @load_monotonic_i64(ptr %ptr) { ++; LA32-LABEL: load_monotonic_i64: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: .cfi_def_cfa_offset 16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: .cfi_offset 1, -4 ++; LA32-NEXT: move $a1, $zero ++; LA32-NEXT: bl %plt(__atomic_load_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_monotonic_i64: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.d $a0, $a0, 0 ++; LA64-NEXT: ret ++ %val = load atomic i64, ptr %ptr monotonic, align 8 ++ ret i64 %val ++} ++ ++define i8 @load_seq_cst_i8(ptr %ptr) { ++; LA32-LABEL: load_seq_cst_i8: ++; LA32: # %bb.0: ++; LA32-NEXT: ld.b $a0, $a0, 0 ++; LA32-NEXT: dbar 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_seq_cst_i8: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.b $a0, $a0, 0 ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: ret ++ %val = load atomic i8, ptr %ptr seq_cst, align 1 ++ ret i8 %val ++} ++ ++define i16 @load_seq_cst_i16(ptr %ptr) { ++; LA32-LABEL: load_seq_cst_i16: ++; LA32: # %bb.0: ++; LA32-NEXT: ld.h $a0, $a0, 0 ++; LA32-NEXT: dbar 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_seq_cst_i16: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.h $a0, $a0, 0 ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: ret ++ %val = load atomic i16, ptr %ptr seq_cst, align 2 ++ ret i16 %val ++} ++ ++define i32 @load_seq_cst_i32(ptr %ptr) { ++; LA32-LABEL: load_seq_cst_i32: ++; LA32: # %bb.0: ++; LA32-NEXT: ld.w $a0, $a0, 0 ++; LA32-NEXT: dbar 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_seq_cst_i32: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.w $a0, $a0, 0 ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: ret ++ %val = load atomic i32, ptr %ptr seq_cst, align 4 ++ ret i32 %val ++} ++ ++define i64 @load_seq_cst_i64(ptr %ptr) { ++; LA32-LABEL: load_seq_cst_i64: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: .cfi_def_cfa_offset 16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: .cfi_offset 1, -4 ++; LA32-NEXT: ori $a1, $zero, 5 ++; LA32-NEXT: bl %plt(__atomic_load_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_seq_cst_i64: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.d $a0, $a0, 0 ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: ret ++ %val = load atomic i64, ptr %ptr seq_cst, align 8 ++ ret i64 %val ++} ++ + define void @store_release_i8(ptr %ptr, i8 signext %v) { + ; LA32-LABEL: store_release_i8: + ; LA32: # %bb.0: +-- +2.20.1 + + +From 0f189600f07f701d96940c2cc52ca762d2be9104 Mon Sep 17 00:00:00 2001 +From: WANG Xuerui <git@xen0n.name> +Date: Wed, 11 Oct 2023 10:39:13 +0800 +Subject: PATCH 3/7 LoongArch Support finer-grained DBAR hints for LA664+ + (#68787) + +These are treated as DBAR 0 on older uarchs, so we can start to +unconditionally emit the new hints right away. + +Co-authored-by: WANG Rui <wangrui@loongson.cn> +(cherry picked from commit 956482de13107b640cffedd08610fcccd98f708f) +--- + .../LoongArchExpandAtomicPseudoInsts.cpp | 4 +- + .../LoongArch/LoongArchISelLowering.cpp | 20 +++++++ + .../Target/LoongArch/LoongArchISelLowering.h | 1 + + .../Target/LoongArch/LoongArchInstrInfo.td | 24 +++++++- + .../LoongArch/atomicrmw-uinc-udec-wrap.ll | 16 ++--- + .../ir-instruction/atomic-cmpxchg.ll | 24 ++++---- + .../LoongArch/ir-instruction/atomicrmw-fp.ll | 48 +++++++-------- + .../ir-instruction/fence-singlethread.ll | 4 +- + .../CodeGen/LoongArch/ir-instruction/fence.ll | 16 ++--- + .../ir-instruction/load-store-atomic.ll | 58 +++++++++---------- + 10 files changed, 129 insertions(+), 86 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +index eb78ef065b21..b348cb56c136 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +@@ -579,8 +579,8 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( + case AtomicOrdering::Acquire: + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: +- // TODO: acquire +- hint = 0; ++ // acquire ++ hint = 0b10100; + break; + default: + hint = 0x700; +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 5affaf37ad5a..33a3197013cc 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -159,6 +159,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + // The MULO libcall is not part of libgcc, only compiler-rt. + setLibcallName(RTLIB::MULO_I128, nullptr); + ++ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); ++ + static const ISD::CondCode FPCCToExpand = { + ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, + ISD::SETGE, ISD::SETNE, ISD::SETGT}; +@@ -366,6 +368,8 @@ bool LoongArchTargetLowering::isOffsetFoldingLegal( + SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { + switch (Op.getOpcode()) { ++ case ISD::ATOMIC_FENCE: ++ return lowerATOMIC_FENCE(Op, DAG); + case ISD::EH_DWARF_CFA: + return lowerEH_DWARF_CFA(Op, DAG); + case ISD::GlobalAddress: +@@ -542,6 +546,22 @@ LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, + return SDValue(); + } + ++SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op, ++ SelectionDAG &DAG) const { ++ SDLoc DL(Op); ++ SyncScope::ID FenceSSID = ++ static_cast<SyncScope::ID>(Op.getConstantOperandVal(2)); ++ ++ // singlethread fences only synchronize with signal handlers on the same ++ // thread and thus only need to preserve instruction order, not actually ++ // enforce memory ordering. ++ if (FenceSSID == SyncScope::SingleThread) ++ // MEMBARRIER is a compiler barrier; it codegens to a no-op. ++ return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); ++ ++ return Op; ++} ++ + SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op, + SelectionDAG &DAG) const { + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +index 6b5a851ec55d..23b90640a690 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +@@ -266,6 +266,7 @@ private: + MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *BB) const override; ++ SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index a9b0db30c2f6..fcbd314507a5 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -1590,7 +1590,29 @@ def : RegRegStPat<store, STX_D, GPR, i64>; + + /// Atomic loads and stores + +-def : Pat<(atomic_fence timm, timm), (DBAR 0)>; ++// DBAR hint encoding for LA664 and later micro-architectures, paraphrased from ++// the Linux patch revealing it 1: ++// ++// - Bit 4: kind of constraint (0: completion, 1: ordering) ++// - Bit 3: barrier for previous read (0: true, 1: false) ++// - Bit 2: barrier for previous write (0: true, 1: false) ++// - Bit 1: barrier for succeeding read (0: true, 1: false) ++// - Bit 0: barrier for succeeding write (0: true, 1: false) ++// ++// Hint 0x700: barrier for "read after read" from the same address, which is ++// e.g. needed by LL-SC loops on older models. (DBAR 0x700 behaves the same as ++// nop if such reordering is disabled on supporting newer models.) ++// ++// 1: https://lore.kernel.org/loongarch/20230516124536.535343-1-chenhuacai@loongson.cn/ ++// ++// Implementations without support for the finer-granularity hints simply treat ++// all as the full barrier (DBAR 0), so we can unconditionally start emiting the ++// more precise hints right away. ++ ++def : Pat<(atomic_fence 4, timm), (DBAR 0b10100)>; // acquire ++def : Pat<(atomic_fence 5, timm), (DBAR 0b10010)>; // release ++def : Pat<(atomic_fence 6, timm), (DBAR 0b10000)>; // acqrel ++def : Pat<(atomic_fence 7, timm), (DBAR 0b10000)>; // seqcst + + defm : LdPat<atomic_load_8, LD_B>; + defm : LdPat<atomic_load_16, LD_H>; +diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +index 32106886c783..d8908acbc945 100644 +--- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll ++++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +@@ -40,7 +40,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: b .LBB0_6 + ; LA64-NEXT: .LBB0_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64-NEXT: addi.w $a6, $a3, 0 +@@ -93,7 +93,7 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: b .LBB1_6 + ; LA64-NEXT: .LBB1_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64-NEXT: addi.w $a6, $a3, 0 +@@ -133,7 +133,7 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { + ; LA64-NEXT: b .LBB2_6 + ; LA64-NEXT: .LBB2_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64-NEXT: move $a3, $a1 +@@ -171,7 +171,7 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { + ; LA64-NEXT: b .LBB3_6 + ; LA64-NEXT: .LBB3_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64-NEXT: bne $a2, $a3, .LBB3_1 +@@ -226,7 +226,7 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: b .LBB4_6 + ; LA64-NEXT: .LBB4_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB4_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 + ; LA64-NEXT: addi.w $a7, $a3, 0 +@@ -284,7 +284,7 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: b .LBB5_6 + ; LA64-NEXT: .LBB5_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB5_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 + ; LA64-NEXT: addi.w $a7, $a3, 0 +@@ -329,7 +329,7 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { + ; LA64-NEXT: b .LBB6_6 + ; LA64-NEXT: .LBB6_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB6_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 +@@ -372,7 +372,7 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { + ; LA64-NEXT: b .LBB7_6 + ; LA64-NEXT: .LBB7_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB7_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 + ; LA64-NEXT: bne $a2, $a3, .LBB7_1 +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +index 1ac20d10e587..4f25a1d69af1 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +@@ -27,7 +27,7 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { + ; LA64-NEXT: beqz $a5, .LBB0_1 + ; LA64-NEXT: b .LBB0_4 + ; LA64-NEXT: .LBB0_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB0_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire +@@ -61,7 +61,7 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind + ; LA64-NEXT: beqz $a5, .LBB1_1 + ; LA64-NEXT: b .LBB1_4 + ; LA64-NEXT: .LBB1_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB1_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire +@@ -80,7 +80,7 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind + ; LA64-NEXT: beqz $a4, .LBB2_1 + ; LA64-NEXT: b .LBB2_4 + ; LA64-NEXT: .LBB2_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB2_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire +@@ -99,7 +99,7 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind + ; LA64-NEXT: beqz $a4, .LBB3_1 + ; LA64-NEXT: b .LBB3_4 + ; LA64-NEXT: .LBB3_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB3_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire +@@ -132,7 +132,7 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind + ; LA64-NEXT: beqz $a6, .LBB4_1 + ; LA64-NEXT: b .LBB4_4 + ; LA64-NEXT: .LBB4_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB4_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret +@@ -168,7 +168,7 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou + ; LA64-NEXT: beqz $a6, .LBB5_1 + ; LA64-NEXT: b .LBB5_4 + ; LA64-NEXT: .LBB5_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB5_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret +@@ -189,7 +189,7 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou + ; LA64-NEXT: beqz $a4, .LBB6_1 + ; LA64-NEXT: b .LBB6_4 + ; LA64-NEXT: .LBB6_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB6_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret +@@ -210,7 +210,7 @@ define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nou + ; LA64-NEXT: beqz $a4, .LBB7_1 + ; LA64-NEXT: b .LBB7_4 + ; LA64-NEXT: .LBB7_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB7_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret +@@ -245,7 +245,7 @@ define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind + ; LA64-NEXT: beqz $a6, .LBB8_1 + ; LA64-NEXT: b .LBB8_4 + ; LA64-NEXT: .LBB8_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB8_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 +@@ -284,7 +284,7 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw + ; LA64-NEXT: beqz $a6, .LBB9_1 + ; LA64-NEXT: b .LBB9_4 + ; LA64-NEXT: .LBB9_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB9_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 +@@ -308,7 +308,7 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw + ; LA64-NEXT: beqz $a4, .LBB10_1 + ; LA64-NEXT: b .LBB10_4 + ; LA64-NEXT: .LBB10_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB10_4: + ; LA64-NEXT: addi.w $a0, $a1, 0 + ; LA64-NEXT: xor $a0, $a3, $a0 +@@ -331,7 +331,7 @@ define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounw + ; LA64-NEXT: beqz $a4, .LBB11_1 + ; LA64-NEXT: b .LBB11_4 + ; LA64-NEXT: .LBB11_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB11_4: + ; LA64-NEXT: xor $a0, $a3, $a1 + ; LA64-NEXT: sltui $a0, $a0, 1 +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +index 02d481cb3865..589360823b14 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +@@ -29,7 +29,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB0_6 + ; LA64F-NEXT: .LBB0_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -64,7 +64,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB0_6 + ; LA64D-NEXT: .LBB0_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -103,7 +103,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB1_6 + ; LA64F-NEXT: .LBB1_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -138,7 +138,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB1_6 + ; LA64D-NEXT: .LBB1_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -178,7 +178,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB2_6 + ; LA64F-NEXT: .LBB2_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -214,7 +214,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB2_6 + ; LA64D-NEXT: .LBB2_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -254,7 +254,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB3_6 + ; LA64F-NEXT: .LBB3_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -290,7 +290,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB3_6 + ; LA64D-NEXT: .LBB3_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -1385,7 +1385,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB16_6 + ; LA64F-NEXT: .LBB16_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB16_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -1420,7 +1420,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB16_6 + ; LA64D-NEXT: .LBB16_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB16_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -1459,7 +1459,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB17_6 + ; LA64F-NEXT: .LBB17_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB17_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -1494,7 +1494,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB17_6 + ; LA64D-NEXT: .LBB17_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB17_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -1534,7 +1534,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB18_6 + ; LA64F-NEXT: .LBB18_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB18_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -1570,7 +1570,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB18_6 + ; LA64D-NEXT: .LBB18_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB18_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -1610,7 +1610,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB19_6 + ; LA64F-NEXT: .LBB19_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB19_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -1646,7 +1646,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB19_6 + ; LA64D-NEXT: .LBB19_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB19_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -2087,7 +2087,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB24_6 + ; LA64F-NEXT: .LBB24_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB24_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -2122,7 +2122,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB24_6 + ; LA64D-NEXT: .LBB24_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB24_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -2161,7 +2161,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB25_6 + ; LA64F-NEXT: .LBB25_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB25_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -2196,7 +2196,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB25_6 + ; LA64D-NEXT: .LBB25_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB25_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -2236,7 +2236,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB26_6 + ; LA64F-NEXT: .LBB26_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB26_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -2272,7 +2272,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB26_6 + ; LA64D-NEXT: .LBB26_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB26_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -2312,7 +2312,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB27_6 + ; LA64F-NEXT: .LBB27_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB27_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -2348,7 +2348,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB27_6 + ; LA64D-NEXT: .LBB27_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB27_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll +index 8d6056bc7677..a8b164a4cd3c 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll +@@ -5,12 +5,12 @@ + define void @fence_singlethread() { + ; LA32-LABEL: fence_singlethread: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: #MEMBARRIER + ; LA32-NEXT: ret + ; + ; LA64-LABEL: fence_singlethread: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: #MEMBARRIER + ; LA64-NEXT: ret + fence syncscope("singlethread") seq_cst + ret void +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll +index 724639f3c6fb..c5b2232f9b80 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll +@@ -5,12 +5,12 @@ + define void @fence_acquire() nounwind { + ; LA32-LABEL: fence_acquire: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 20 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: fence_acquire: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: ret + fence acquire + ret void +@@ -19,12 +19,12 @@ define void @fence_acquire() nounwind { + define void @fence_release() nounwind { + ; LA32-LABEL: fence_release: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 18 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: fence_release: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 18 + ; LA64-NEXT: ret + fence release + ret void +@@ -33,12 +33,12 @@ define void @fence_release() nounwind { + define void @fence_acq_rel() nounwind { + ; LA32-LABEL: fence_acq_rel: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: fence_acq_rel: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + fence acq_rel + ret void +@@ -47,12 +47,12 @@ define void @fence_acq_rel() nounwind { + define void @fence_seq_cst() nounwind { + ; LA32-LABEL: fence_seq_cst: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: fence_seq_cst: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + fence seq_cst + ret void +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll +index deff11723d27..8b170c479eed 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll +@@ -6,13 +6,13 @@ define i8 @load_acquire_i8(ptr %ptr) { + ; LA32-LABEL: load_acquire_i8: + ; LA32: # %bb.0: + ; LA32-NEXT: ld.b $a0, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 20 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: load_acquire_i8: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.b $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: ret + %val = load atomic i8, ptr %ptr acquire, align 1 + ret i8 %val +@@ -22,13 +22,13 @@ define i16 @load_acquire_i16(ptr %ptr) { + ; LA32-LABEL: load_acquire_i16: + ; LA32: # %bb.0: + ; LA32-NEXT: ld.h $a0, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 20 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: load_acquire_i16: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.h $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: ret + %val = load atomic i16, ptr %ptr acquire, align 2 + ret i16 %val +@@ -38,13 +38,13 @@ define i32 @load_acquire_i32(ptr %ptr) { + ; LA32-LABEL: load_acquire_i32: + ; LA32: # %bb.0: + ; LA32-NEXT: ld.w $a0, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 20 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: load_acquire_i32: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.w $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: ret + %val = load atomic i32, ptr %ptr acquire, align 4 + ret i32 %val +@@ -66,7 +66,7 @@ define i64 @load_acquire_i64(ptr %ptr) { + ; LA64-LABEL: load_acquire_i64: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.d $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: ret + %val = load atomic i64, ptr %ptr acquire, align 8 + ret i64 %val +@@ -202,13 +202,13 @@ define i8 @load_seq_cst_i8(ptr %ptr) { + ; LA32-LABEL: load_seq_cst_i8: + ; LA32: # %bb.0: + ; LA32-NEXT: ld.b $a0, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: load_seq_cst_i8: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.b $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + %val = load atomic i8, ptr %ptr seq_cst, align 1 + ret i8 %val +@@ -218,13 +218,13 @@ define i16 @load_seq_cst_i16(ptr %ptr) { + ; LA32-LABEL: load_seq_cst_i16: + ; LA32: # %bb.0: + ; LA32-NEXT: ld.h $a0, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: load_seq_cst_i16: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.h $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + %val = load atomic i16, ptr %ptr seq_cst, align 2 + ret i16 %val +@@ -234,13 +234,13 @@ define i32 @load_seq_cst_i32(ptr %ptr) { + ; LA32-LABEL: load_seq_cst_i32: + ; LA32: # %bb.0: + ; LA32-NEXT: ld.w $a0, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: load_seq_cst_i32: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.w $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + %val = load atomic i32, ptr %ptr seq_cst, align 4 + ret i32 %val +@@ -262,7 +262,7 @@ define i64 @load_seq_cst_i64(ptr %ptr) { + ; LA64-LABEL: load_seq_cst_i64: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.d $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + %val = load atomic i64, ptr %ptr seq_cst, align 8 + ret i64 %val +@@ -271,13 +271,13 @@ define i64 @load_seq_cst_i64(ptr %ptr) { + define void @store_release_i8(ptr %ptr, i8 signext %v) { + ; LA32-LABEL: store_release_i8: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 18 + ; LA32-NEXT: st.b $a1, $a0, 0 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: store_release_i8: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 18 + ; LA64-NEXT: st.b $a1, $a0, 0 + ; LA64-NEXT: ret + store atomic i8 %v, ptr %ptr release, align 1 +@@ -287,13 +287,13 @@ define void @store_release_i8(ptr %ptr, i8 signext %v) { + define void @store_release_i16(ptr %ptr, i16 signext %v) { + ; LA32-LABEL: store_release_i16: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 18 + ; LA32-NEXT: st.h $a1, $a0, 0 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: store_release_i16: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 18 + ; LA64-NEXT: st.h $a1, $a0, 0 + ; LA64-NEXT: ret + store atomic i16 %v, ptr %ptr release, align 2 +@@ -303,7 +303,7 @@ define void @store_release_i16(ptr %ptr, i16 signext %v) { + define void @store_release_i32(ptr %ptr, i32 signext %v) { + ; LA32-LABEL: store_release_i32: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 18 + ; LA32-NEXT: st.w $a1, $a0, 0 + ; LA32-NEXT: ret + ; +@@ -465,16 +465,16 @@ define void @store_monotonic_i64(ptr %ptr, i64 %v) { + define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) { + ; LA32-LABEL: store_seq_cst_i8: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: st.b $a1, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: store_seq_cst_i8: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: st.b $a1, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + store atomic i8 %v, ptr %ptr seq_cst, align 1 + ret void +@@ -483,16 +483,16 @@ define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) { + define void @store_seq_cst_i16(ptr %ptr, i16 signext %v) { + ; LA32-LABEL: store_seq_cst_i16: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: st.h $a1, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: store_seq_cst_i16: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: st.h $a1, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + store atomic i16 %v, ptr %ptr seq_cst, align 2 + ret void +@@ -501,9 +501,9 @@ define void @store_seq_cst_i16(ptr %ptr, i16 signext %v) { + define void @store_seq_cst_i32(ptr %ptr, i32 signext %v) { + ; LA32-LABEL: store_seq_cst_i32: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: st.w $a1, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: store_seq_cst_i32: +-- +2.20.1 + + +From 7e37560cddfa108426ff9f87871c71ed01e7596e Mon Sep 17 00:00:00 2001 +From: Weining Lu <luweining@loongson.cn> +Date: Tue, 17 Oct 2023 17:41:32 +0800 +Subject: PATCH 4/7 LoongArch Precommit a test for atomic cmpxchg + optmization + +(cherry picked from commit b2773d170cb4bdb4b19ba801b5eb55395024b3ae) +--- + .../ir-instruction/atomic-cmpxchg.ll | 385 +++++++++++------- + 1 file changed, 245 insertions(+), 140 deletions(-) + +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +index 4f25a1d69af1..174bb9d0ff7d 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +@@ -106,6 +106,111 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind + ret void + } + ++define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_acquire_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: andi $a2, $a2, 255 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: ori $a4, $zero, 255 ++; LA64-NEXT: sll.w $a0, $a4, $a0 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a3, 0 ++; LA64-NEXT: and $a5, $a4, $a0 ++; LA64-NEXT: bne $a5, $a1, .LBB4_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 ++; LA64-NEXT: andn $a5, $a4, $a0 ++; LA64-NEXT: or $a5, $a5, $a2 ++; LA64-NEXT: sc.w $a5, $a3, 0 ++; LA64-NEXT: beqz $a5, .LBB4_1 ++; LA64-NEXT: b .LBB4_4 ++; LA64-NEXT: .LBB4_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: .LBB4_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic ++ ret void ++} ++ ++define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_acquire_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a0, $a4, $a0 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a3, 0 ++; LA64-NEXT: and $a5, $a4, $a0 ++; LA64-NEXT: bne $a5, $a1, .LBB5_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 ++; LA64-NEXT: andn $a5, $a4, $a0 ++; LA64-NEXT: or $a5, $a5, $a2 ++; LA64-NEXT: sc.w $a5, $a3, 0 ++; LA64-NEXT: beqz $a5, .LBB5_1 ++; LA64-NEXT: b .LBB5_4 ++; LA64-NEXT: .LBB5_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: .LBB5_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic ++ ret void ++} ++ ++define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_acquire_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB6_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.w $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB6_1 ++; LA64-NEXT: b .LBB6_4 ++; LA64-NEXT: .LBB6_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: .LBB6_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic ++ ret void ++} ++ ++define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_acquire_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB7_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.d $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB7_1 ++; LA64-NEXT: b .LBB7_4 ++; LA64-NEXT: .LBB7_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: .LBB7_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic ++ ret void ++} ++ + define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind { + ; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti8: + ; LA64: # %bb.0: +@@ -121,19 +226,19 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind + ; LA64-NEXT: andi $a1, $a1, 255 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a4 +-; LA64-NEXT: bne $a6, $a1, .LBB4_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB8_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a4 + ; LA64-NEXT: or $a6, $a6, $a2 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB4_1 +-; LA64-NEXT: b .LBB4_4 +-; LA64-NEXT: .LBB4_3: ++; LA64-NEXT: beqz $a6, .LBB8_1 ++; LA64-NEXT: b .LBB8_4 ++; LA64-NEXT: .LBB8_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB4_4: ++; LA64-NEXT: .LBB8_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire +@@ -157,19 +262,19 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou + ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a4 +-; LA64-NEXT: bne $a6, $a1, .LBB5_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB9_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a4 + ; LA64-NEXT: or $a6, $a6, $a2 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB5_1 +-; LA64-NEXT: b .LBB5_4 +-; LA64-NEXT: .LBB5_3: ++; LA64-NEXT: beqz $a6, .LBB9_1 ++; LA64-NEXT: b .LBB9_4 ++; LA64-NEXT: .LBB9_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB5_4: ++; LA64-NEXT: .LBB9_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire +@@ -180,17 +285,17 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou + define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti32: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB6_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB10_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB6_1 +-; LA64-NEXT: b .LBB6_4 +-; LA64-NEXT: .LBB6_3: ++; LA64-NEXT: beqz $a4, .LBB10_1 ++; LA64-NEXT: b .LBB10_4 ++; LA64-NEXT: .LBB10_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB6_4: ++; LA64-NEXT: .LBB10_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire +@@ -201,17 +306,17 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou + define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nounwind { + ; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti64: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.d $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB7_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB11_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB7_1 +-; LA64-NEXT: b .LBB7_4 +-; LA64-NEXT: .LBB7_3: ++; LA64-NEXT: beqz $a4, .LBB11_1 ++; LA64-NEXT: b .LBB11_4 ++; LA64-NEXT: .LBB11_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB7_4: ++; LA64-NEXT: .LBB11_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire +@@ -234,19 +339,19 @@ define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: addi.w $a2, $a4, 0 +-; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a2 +-; LA64-NEXT: bne $a6, $a1, .LBB8_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB12_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a2 + ; LA64-NEXT: or $a6, $a6, $a0 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB8_1 +-; LA64-NEXT: b .LBB8_4 +-; LA64-NEXT: .LBB8_3: ++; LA64-NEXT: beqz $a6, .LBB12_1 ++; LA64-NEXT: b .LBB12_4 ++; LA64-NEXT: .LBB12_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB8_4: ++; LA64-NEXT: .LBB12_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: xor $a0, $a1, $a0 +@@ -273,19 +378,19 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: addi.w $a2, $a4, 0 +-; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a2 +-; LA64-NEXT: bne $a6, $a1, .LBB9_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB13_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a2 + ; LA64-NEXT: or $a6, $a6, $a0 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB9_1 +-; LA64-NEXT: b .LBB9_4 +-; LA64-NEXT: .LBB9_3: ++; LA64-NEXT: beqz $a6, .LBB13_1 ++; LA64-NEXT: b .LBB13_4 ++; LA64-NEXT: .LBB13_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB9_4: ++; LA64-NEXT: .LBB13_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: xor $a0, $a1, $a0 +@@ -299,17 +404,17 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw + define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti1: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB10_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB14_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB10_1 +-; LA64-NEXT: b .LBB10_4 +-; LA64-NEXT: .LBB10_3: ++; LA64-NEXT: beqz $a4, .LBB14_1 ++; LA64-NEXT: b .LBB14_4 ++; LA64-NEXT: .LBB14_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB10_4: ++; LA64-NEXT: .LBB14_4: + ; LA64-NEXT: addi.w $a0, $a1, 0 + ; LA64-NEXT: xor $a0, $a3, $a0 + ; LA64-NEXT: sltui $a0, $a0, 1 +@@ -322,17 +427,17 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw + define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounwind { + ; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti1: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.d $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB11_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB15_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB11_1 +-; LA64-NEXT: b .LBB11_4 +-; LA64-NEXT: .LBB11_3: ++; LA64-NEXT: beqz $a4, .LBB15_1 ++; LA64-NEXT: b .LBB15_4 ++; LA64-NEXT: .LBB15_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB11_4: ++; LA64-NEXT: .LBB15_4: + ; LA64-NEXT: xor $a0, $a3, $a1 + ; LA64-NEXT: sltui $a0, $a0, 1 + ; LA64-NEXT: ret +@@ -356,19 +461,19 @@ define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: addi.w $a2, $a2, 0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a4, $a3, 0 + ; LA64-NEXT: and $a5, $a4, $a0 +-; LA64-NEXT: bne $a5, $a1, .LBB12_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 ++; LA64-NEXT: bne $a5, $a1, .LBB16_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 + ; LA64-NEXT: andn $a5, $a4, $a0 + ; LA64-NEXT: or $a5, $a5, $a2 + ; LA64-NEXT: sc.w $a5, $a3, 0 +-; LA64-NEXT: beqz $a5, .LBB12_1 +-; LA64-NEXT: b .LBB12_4 +-; LA64-NEXT: .LBB12_3: ++; LA64-NEXT: beqz $a5, .LBB16_1 ++; LA64-NEXT: b .LBB16_4 ++; LA64-NEXT: .LBB16_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB12_4: ++; LA64-NEXT: .LBB16_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic + ret void +@@ -390,19 +495,19 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: addi.w $a2, $a2, 0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a4, $a3, 0 + ; LA64-NEXT: and $a5, $a4, $a0 +-; LA64-NEXT: bne $a5, $a1, .LBB13_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 ++; LA64-NEXT: bne $a5, $a1, .LBB17_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 + ; LA64-NEXT: andn $a5, $a4, $a0 + ; LA64-NEXT: or $a5, $a5, $a2 + ; LA64-NEXT: sc.w $a5, $a3, 0 +-; LA64-NEXT: beqz $a5, .LBB13_1 +-; LA64-NEXT: b .LBB13_4 +-; LA64-NEXT: .LBB13_3: ++; LA64-NEXT: beqz $a5, .LBB17_1 ++; LA64-NEXT: b .LBB17_4 ++; LA64-NEXT: .LBB17_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB13_4: ++; LA64-NEXT: .LBB17_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic + ret void +@@ -411,17 +516,17 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw + define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB14_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB18_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB14_1 +-; LA64-NEXT: b .LBB14_4 +-; LA64-NEXT: .LBB14_3: ++; LA64-NEXT: beqz $a4, .LBB18_1 ++; LA64-NEXT: b .LBB18_4 ++; LA64-NEXT: .LBB18_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB14_4: ++; LA64-NEXT: .LBB18_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic + ret void +@@ -430,17 +535,17 @@ define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounw + define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { + ; LA64-LABEL: cmpxchg_i64_monotonic_monotonic: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.d $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB15_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB19_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB15_1 +-; LA64-NEXT: b .LBB15_4 +-; LA64-NEXT: .LBB15_3: ++; LA64-NEXT: beqz $a4, .LBB19_1 ++; LA64-NEXT: b .LBB19_4 ++; LA64-NEXT: .LBB19_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB15_4: ++; LA64-NEXT: .LBB19_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic + ret void +@@ -461,19 +566,19 @@ define i8 @cmpxchg_i8_monotonic_monotonic_reti8(ptr %ptr, i8 %cmp, i8 %val) noun + ; LA64-NEXT: andi $a1, $a1, 255 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a4 +-; LA64-NEXT: bne $a6, $a1, .LBB16_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB20_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a4 + ; LA64-NEXT: or $a6, $a6, $a2 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB16_1 +-; LA64-NEXT: b .LBB16_4 +-; LA64-NEXT: .LBB16_3: ++; LA64-NEXT: beqz $a6, .LBB20_1 ++; LA64-NEXT: b .LBB20_4 ++; LA64-NEXT: .LBB20_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB16_4: ++; LA64-NEXT: .LBB20_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic +@@ -497,19 +602,19 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) + ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a4 +-; LA64-NEXT: bne $a6, $a1, .LBB17_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB21_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a4 + ; LA64-NEXT: or $a6, $a6, $a2 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB17_1 +-; LA64-NEXT: b .LBB17_4 +-; LA64-NEXT: .LBB17_3: ++; LA64-NEXT: beqz $a6, .LBB21_1 ++; LA64-NEXT: b .LBB21_4 ++; LA64-NEXT: .LBB21_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB17_4: ++; LA64-NEXT: .LBB21_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic +@@ -520,17 +625,17 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) + define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB18_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB22_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB18_1 +-; LA64-NEXT: b .LBB18_4 +-; LA64-NEXT: .LBB18_3: ++; LA64-NEXT: beqz $a4, .LBB22_1 ++; LA64-NEXT: b .LBB22_4 ++; LA64-NEXT: .LBB22_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB18_4: ++; LA64-NEXT: .LBB22_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic +@@ -541,17 +646,17 @@ define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) + define i64 @cmpxchg_i64_monotonic_monotonic_reti64(ptr %ptr, i64 %cmp, i64 %val) nounwind { + ; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti64: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.d $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB19_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB23_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB19_1 +-; LA64-NEXT: b .LBB19_4 +-; LA64-NEXT: .LBB19_3: ++; LA64-NEXT: beqz $a4, .LBB23_1 ++; LA64-NEXT: b .LBB23_4 ++; LA64-NEXT: .LBB23_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB19_4: ++; LA64-NEXT: .LBB23_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic +@@ -574,19 +679,19 @@ define i1 @cmpxchg_i8_monotonic_monotonic_reti1(ptr %ptr, i8 %cmp, i8 %val) noun + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: addi.w $a2, $a4, 0 +-; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a2 +-; LA64-NEXT: bne $a6, $a1, .LBB20_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB24_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a2 + ; LA64-NEXT: or $a6, $a6, $a0 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB20_1 +-; LA64-NEXT: b .LBB20_4 +-; LA64-NEXT: .LBB20_3: ++; LA64-NEXT: beqz $a6, .LBB24_1 ++; LA64-NEXT: b .LBB24_4 ++; LA64-NEXT: .LBB24_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB20_4: ++; LA64-NEXT: .LBB24_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: xor $a0, $a1, $a0 +@@ -613,19 +718,19 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: addi.w $a2, $a4, 0 +-; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a2 +-; LA64-NEXT: bne $a6, $a1, .LBB21_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB25_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a2 + ; LA64-NEXT: or $a6, $a6, $a0 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB21_1 +-; LA64-NEXT: b .LBB21_4 +-; LA64-NEXT: .LBB21_3: ++; LA64-NEXT: beqz $a6, .LBB25_1 ++; LA64-NEXT: b .LBB25_4 ++; LA64-NEXT: .LBB25_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB21_4: ++; LA64-NEXT: .LBB25_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: xor $a0, $a1, $a0 +@@ -639,17 +744,17 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n + define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB22_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB26_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB22_1 +-; LA64-NEXT: b .LBB22_4 +-; LA64-NEXT: .LBB22_3: ++; LA64-NEXT: beqz $a4, .LBB26_1 ++; LA64-NEXT: b .LBB26_4 ++; LA64-NEXT: .LBB26_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB22_4: ++; LA64-NEXT: .LBB26_4: + ; LA64-NEXT: addi.w $a0, $a1, 0 + ; LA64-NEXT: xor $a0, $a3, $a0 + ; LA64-NEXT: sltui $a0, $a0, 1 +@@ -662,17 +767,17 @@ define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) n + define i1 @cmpxchg_i64_monotonic_monotonic_reti1(ptr %ptr, i64 %cmp, i64 %val) nounwind { + ; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti1: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.d $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB23_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB27_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB23_1 +-; LA64-NEXT: b .LBB23_4 +-; LA64-NEXT: .LBB23_3: ++; LA64-NEXT: beqz $a4, .LBB27_1 ++; LA64-NEXT: b .LBB27_4 ++; LA64-NEXT: .LBB27_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB23_4: ++; LA64-NEXT: .LBB27_4: + ; LA64-NEXT: xor $a0, $a3, $a1 + ; LA64-NEXT: sltui $a0, $a0, 1 + ; LA64-NEXT: ret +-- +2.20.1 + + +From 331674f3553b747d9869276ae34667dce7099a09 Mon Sep 17 00:00:00 2001 +From: Lu Weining <90239436+SixWeining@users.noreply.github.com> +Date: Thu, 19 Oct 2023 09:21:51 +0800 +Subject: PATCH 5/7 LoongArch Improve codegen for atomic cmpxchg ops + (#69339) + +PR #67391 improved atomic codegen by handling memory ordering specified +by the `cmpxchg` instruction. An acquire barrier needs to be generated +when memory ordering includes an acquire operation. This PR improves the +codegen further by only handling the failure ordering. + +(cherry picked from commit 78abc45c44cdadf76b30e1f3dc24936bb5627d68) +--- + .../LoongArchExpandAtomicPseudoInsts.cpp | 4 +- + .../LoongArch/LoongArchISelLowering.cpp | 7 ++- + .../Target/LoongArch/LoongArchInstrInfo.td | 55 ++++++++++++++++--- + .../ir-instruction/atomic-cmpxchg.ll | 8 +-- + 4 files changed, 56 insertions(+), 18 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +index b348cb56c136..18a532b55ee5 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +@@ -571,11 +571,11 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB); + } + +- AtomicOrdering Ordering = ++ AtomicOrdering FailureOrdering = + static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm()); + int hint; + +- switch (Ordering) { ++ switch (FailureOrdering) { + case AtomicOrdering::Acquire: + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 33a3197013cc..99328f09921f 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -4492,8 +4492,9 @@ LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR( + Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( + IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, + Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { +- Value *Ordering = +- Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(Ord)); ++ AtomicOrdering FailOrd = CI->getFailureOrdering(); ++ Value *FailureOrdering = ++ Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd)); + + // TODO: Support cmpxchg on LA32. + Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64; +@@ -4504,7 +4505,7 @@ Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( + Function *MaskedCmpXchg = + Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); + Value *Result = Builder.CreateCall( +- MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); ++ MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering}); + Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); + return Result; + } +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index fcbd314507a5..ab1890556814 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -1753,7 +1753,7 @@ def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax; + + class PseudoCmpXchg + : Pseudo<(outs GPR:$res, GPR:$scratch), +- (ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$ordering)> { ++ (ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$fail_order)> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; + let mayLoad = 1; + let mayStore = 1; +@@ -1767,7 +1767,7 @@ def PseudoCmpXchg64 : PseudoCmpXchg; + def PseudoMaskedCmpXchg32 + : Pseudo<(outs GPR:$res, GPR:$scratch), + (ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, +- grlenimm:$ordering)> { ++ grlenimm:$fail_order)> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; + let mayLoad = 1; + let mayStore = 1; +@@ -1785,6 +1785,43 @@ class AtomicPat<Intrinsic intrin, Pseudo AMInst> + : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering), + (AMInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>; + ++// These atomic cmpxchg PatFrags only care about the failure ordering. ++// The PatFrags defined by multiclass `ternary_atomic_op_ord` in ++// TargetSelectionDAG.td care about the merged memory ordering that is the ++// stronger one between success and failure. But for LoongArch LL-SC we only ++// need to care about the failure ordering as explained in PR #67391. So we ++// define these PatFrags that will be used to define cmpxchg pats below. ++multiclass ternary_atomic_op_failure_ord { ++ def NAME#_failure_monotonic : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), { ++ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::Monotonic; ++ }>; ++ def NAME#_failure_acquire : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), { ++ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::Acquire; ++ }>; ++ def NAME#_failure_release : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), { ++ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::Release; ++ }>; ++ def NAME#_failure_acq_rel : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), { ++ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::AcquireRelease; ++ }>; ++ def NAME#_failure_seq_cst : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), { ++ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::SequentiallyConsistent; ++ }>; ++} ++ ++defm atomic_cmp_swap_32 : ternary_atomic_op_failure_ord; ++defm atomic_cmp_swap_64 : ternary_atomic_op_failure_ord; ++ + let Predicates = IsLA64 in { + def : AtomicPat<int_loongarch_masked_atomicrmw_xchg_i64, + PseudoMaskedAtomicSwap32>; +@@ -1847,24 +1884,24 @@ def : AtomicPat<int_loongarch_masked_atomicrmw_umin_i64, + // AtomicOrdering.h. + multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst, + ValueType vt = GRLenVT> { +- def : Pat<(vt (!cast<PatFrag>(Op#"_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)), ++ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>; +- def : Pat<(vt (!cast<PatFrag>(Op#"_acquire") GPR:$addr, GPR:$cmp, GPR:$new)), ++ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acquire") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>; +- def : Pat<(vt (!cast<PatFrag>(Op#"_release") GPR:$addr, GPR:$cmp, GPR:$new)), ++ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_release") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>; +- def : Pat<(vt (!cast<PatFrag>(Op#"_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)), ++ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>; +- def : Pat<(vt (!cast<PatFrag>(Op#"_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)), ++ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>; + } + + defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>; + defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>; + def : Pat<(int_loongarch_masked_cmpxchg_i64 +- GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), ++ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order), + (PseudoMaskedCmpXchg32 +- GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; ++ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order)>; + + def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_max_i64, + PseudoMaskedAtomicLoadMax32>; +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +index 174bb9d0ff7d..1dd3f39852d8 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +@@ -132,7 +132,7 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { + ; LA64-NEXT: beqz $a5, .LBB4_1 + ; LA64-NEXT: b .LBB4_4 + ; LA64-NEXT: .LBB4_3: +-; LA64-NEXT: dbar 20 ++; LA64-NEXT: dbar 1792 + ; LA64-NEXT: .LBB4_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic +@@ -166,7 +166,7 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin + ; LA64-NEXT: beqz $a5, .LBB5_1 + ; LA64-NEXT: b .LBB5_4 + ; LA64-NEXT: .LBB5_3: +-; LA64-NEXT: dbar 20 ++; LA64-NEXT: dbar 1792 + ; LA64-NEXT: .LBB5_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic +@@ -185,7 +185,7 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin + ; LA64-NEXT: beqz $a4, .LBB6_1 + ; LA64-NEXT: b .LBB6_4 + ; LA64-NEXT: .LBB6_3: +-; LA64-NEXT: dbar 20 ++; LA64-NEXT: dbar 1792 + ; LA64-NEXT: .LBB6_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic +@@ -204,7 +204,7 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin + ; LA64-NEXT: beqz $a4, .LBB7_1 + ; LA64-NEXT: b .LBB7_4 + ; LA64-NEXT: .LBB7_3: +-; LA64-NEXT: dbar 20 ++; LA64-NEXT: dbar 1792 + ; LA64-NEXT: .LBB7_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic +-- +2.20.1 + + +From d8479f9765b12a84d1756aedebf631fdbe4f0451 Mon Sep 17 00:00:00 2001 +From: Lu Weining <luweining@loongson.cn> +Date: Mon, 4 Mar 2024 08:38:52 +0800 +Subject: PATCH 6/7 LoongArch Override + LoongArchTargetLowering::getExtendForAtomicCmpSwapArg (#83656) + +This patch aims to solve Firefox issue: +https://bugzilla.mozilla.org/show_bug.cgi?id=1882301 + +Similar to 616289ed2922. Currently LoongArch uses an ll.wd/sc.wd +loop for ATOMIC_CMP_XCHG. Because the comparison in the loop is +full-width (i.e. the `bne` instruction), we must sign extend the input +comparsion argument. + +Note that LoongArch ISA manual V1.1 has introduced compare-and-swap +instructions. We would change the implementation (return `ANY_EXTEND`) +when we support them. + +(cherry picked from commit 5f058aa211995d2f0df2a0e063532832569cb7a8) +(cherry picked from commit ea6c457b8dd2d0e6a7f05b4a5bdd2686085e1ec0) +--- + .../LoongArch/LoongArchISelLowering.cpp | 5 + + .../Target/LoongArch/LoongArchISelLowering.h | 2 + + .../LoongArch/atomicrmw-uinc-udec-wrap.ll | 120 +++++++------ + .../ir-instruction/atomic-cmpxchg.ll | 25 +-- + .../LoongArch/ir-instruction/atomicrmw-fp.ll | 160 +++++++++--------- + 5 files changed, 159 insertions(+), 153 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 99328f09921f..4fc2b4709840 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -4893,3 +4893,8 @@ bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const { + + return !isa<ConstantSDNode>(Y); + } ++ ++ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const { ++ // TODO: LAMCAS will use amcas{_DB,}.bhwd which does not require extension. ++ return ISD::SIGN_EXTEND; ++} +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +index 23b90640a690..2c9826a13237 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +@@ -203,6 +203,8 @@ public: + return ISD::SIGN_EXTEND; + } + ++ ISD::NodeType getExtendForAtomicCmpSwapArg() const override; ++ + Register getRegisterByName(const char *RegName, LLT VT, + const MachineFunction &MF) const override; + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; +diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +index d8908acbc945..f0baf19bcf0e 100644 +--- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll ++++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +@@ -26,15 +26,16 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: andi $a5, $a5, 255 + ; LA64-NEXT: sll.w $a5, $a5, $a0 + ; LA64-NEXT: and $a6, $a3, $a4 +-; LA64-NEXT: or $a6, $a6, $a5 ++; LA64-NEXT: or $a5, $a6, $a5 ++; LA64-NEXT: addi.w $a6, $a3, 0 + ; LA64-NEXT: .LBB0_3: # %atomicrmw.start + ; LA64-NEXT: # Parent Loop BB0_1 Depth=1 + ; LA64-NEXT: # => This Inner Loop Header: Depth=2 +-; LA64-NEXT: ll.w $a5, $a2, 0 +-; LA64-NEXT: bne $a5, $a3, .LBB0_5 ++; LA64-NEXT: ll.w $a3, $a2, 0 ++; LA64-NEXT: bne $a3, $a6, .LBB0_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2 +-; LA64-NEXT: move $a7, $a6 ++; LA64-NEXT: move $a7, $a5 + ; LA64-NEXT: sc.w $a7, $a2, 0 + ; LA64-NEXT: beqz $a7, .LBB0_3 + ; LA64-NEXT: b .LBB0_6 +@@ -43,11 +44,9 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 +-; LA64-NEXT: addi.w $a6, $a3, 0 +-; LA64-NEXT: move $a3, $a5 +-; LA64-NEXT: bne $a5, $a6, .LBB0_1 ++; LA64-NEXT: bne $a3, $a6, .LBB0_1 + ; LA64-NEXT: # %bb.2: # %atomicrmw.end +-; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: srl.w $a0, $a3, $a0 + ; LA64-NEXT: ret + %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +@@ -79,15 +78,16 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0 + ; LA64-NEXT: sll.w $a5, $a5, $a0 + ; LA64-NEXT: and $a6, $a3, $a4 +-; LA64-NEXT: or $a6, $a6, $a5 ++; LA64-NEXT: or $a5, $a6, $a5 ++; LA64-NEXT: addi.w $a6, $a3, 0 + ; LA64-NEXT: .LBB1_3: # %atomicrmw.start + ; LA64-NEXT: # Parent Loop BB1_1 Depth=1 + ; LA64-NEXT: # => This Inner Loop Header: Depth=2 +-; LA64-NEXT: ll.w $a5, $a2, 0 +-; LA64-NEXT: bne $a5, $a3, .LBB1_5 ++; LA64-NEXT: ll.w $a3, $a2, 0 ++; LA64-NEXT: bne $a3, $a6, .LBB1_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2 +-; LA64-NEXT: move $a7, $a6 ++; LA64-NEXT: move $a7, $a5 + ; LA64-NEXT: sc.w $a7, $a2, 0 + ; LA64-NEXT: beqz $a7, .LBB1_3 + ; LA64-NEXT: b .LBB1_6 +@@ -96,11 +96,9 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 +-; LA64-NEXT: addi.w $a6, $a3, 0 +-; LA64-NEXT: move $a3, $a5 +-; LA64-NEXT: bne $a5, $a6, .LBB1_1 ++; LA64-NEXT: bne $a3, $a6, .LBB1_1 + ; LA64-NEXT: # %bb.2: # %atomicrmw.end +-; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: srl.w $a0, $a3, $a0 + ; LA64-NEXT: ret + %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +@@ -109,37 +107,36 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { + define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { + ; LA64-LABEL: atomicrmw_uinc_wrap_i32: + ; LA64: # %bb.0: +-; LA64-NEXT: ld.w $a3, $a0, 0 +-; LA64-NEXT: addi.w $a2, $a1, 0 ++; LA64-NEXT: ld.w $a2, $a0, 0 ++; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .p2align 4, , 16 + ; LA64-NEXT: .LBB2_1: # %atomicrmw.start + ; LA64-NEXT: # =>This Loop Header: Depth=1 + ; LA64-NEXT: # Child Loop BB2_3 Depth 2 +-; LA64-NEXT: addi.w $a4, $a3, 0 +-; LA64-NEXT: sltu $a1, $a4, $a2 +-; LA64-NEXT: xori $a1, $a1, 1 +-; LA64-NEXT: addi.d $a5, $a3, 1 +-; LA64-NEXT: masknez $a5, $a5, $a1 ++; LA64-NEXT: addi.w $a3, $a2, 0 ++; LA64-NEXT: sltu $a4, $a3, $a1 ++; LA64-NEXT: xori $a4, $a4, 1 ++; LA64-NEXT: addi.d $a2, $a2, 1 ++; LA64-NEXT: masknez $a4, $a2, $a4 + ; LA64-NEXT: .LBB2_3: # %atomicrmw.start + ; LA64-NEXT: # Parent Loop BB2_1 Depth=1 + ; LA64-NEXT: # => This Inner Loop Header: Depth=2 +-; LA64-NEXT: ll.w $a1, $a0, 0 +-; LA64-NEXT: bne $a1, $a3, .LBB2_5 ++; LA64-NEXT: ll.w $a2, $a0, 0 ++; LA64-NEXT: bne $a2, $a3, .LBB2_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_3 Depth=2 +-; LA64-NEXT: move $a6, $a5 +-; LA64-NEXT: sc.w $a6, $a0, 0 +-; LA64-NEXT: beqz $a6, .LBB2_3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: sc.w $a5, $a0, 0 ++; LA64-NEXT: beqz $a5, .LBB2_3 + ; LA64-NEXT: b .LBB2_6 + ; LA64-NEXT: .LBB2_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 +-; LA64-NEXT: move $a3, $a1 +-; LA64-NEXT: bne $a1, $a4, .LBB2_1 ++; LA64-NEXT: bne $a2, $a3, .LBB2_1 + ; LA64-NEXT: # %bb.2: # %atomicrmw.end +-; LA64-NEXT: move $a0, $a1 ++; LA64-NEXT: move $a0, $a2 + ; LA64-NEXT: ret + %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst + ret i32 %result +@@ -212,15 +209,16 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: andi $a6, $a6, 255 + ; LA64-NEXT: sll.w $a6, $a6, $a0 + ; LA64-NEXT: and $a7, $a3, $a4 +-; LA64-NEXT: or $a7, $a7, $a6 ++; LA64-NEXT: or $a6, $a7, $a6 ++; LA64-NEXT: addi.w $a7, $a3, 0 + ; LA64-NEXT: .LBB4_3: # %atomicrmw.start + ; LA64-NEXT: # Parent Loop BB4_1 Depth=1 + ; LA64-NEXT: # => This Inner Loop Header: Depth=2 +-; LA64-NEXT: ll.w $a6, $a2, 0 +-; LA64-NEXT: bne $a6, $a3, .LBB4_5 ++; LA64-NEXT: ll.w $a3, $a2, 0 ++; LA64-NEXT: bne $a3, $a7, .LBB4_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2 +-; LA64-NEXT: move $t0, $a7 ++; LA64-NEXT: move $t0, $a6 + ; LA64-NEXT: sc.w $t0, $a2, 0 + ; LA64-NEXT: beqz $t0, .LBB4_3 + ; LA64-NEXT: b .LBB4_6 +@@ -229,11 +227,9 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB4_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 +-; LA64-NEXT: addi.w $a7, $a3, 0 +-; LA64-NEXT: move $a3, $a6 +-; LA64-NEXT: bne $a6, $a7, .LBB4_1 ++; LA64-NEXT: bne $a3, $a7, .LBB4_1 + ; LA64-NEXT: # %bb.2: # %atomicrmw.end +-; LA64-NEXT: srl.w $a0, $a6, $a0 ++; LA64-NEXT: srl.w $a0, $a3, $a0 + ; LA64-NEXT: ret + %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +@@ -270,15 +266,16 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: bstrpick.d $a6, $a6, 15, 0 + ; LA64-NEXT: sll.w $a6, $a6, $a0 + ; LA64-NEXT: and $a7, $a3, $a4 +-; LA64-NEXT: or $a7, $a7, $a6 ++; LA64-NEXT: or $a6, $a7, $a6 ++; LA64-NEXT: addi.w $a7, $a3, 0 + ; LA64-NEXT: .LBB5_3: # %atomicrmw.start + ; LA64-NEXT: # Parent Loop BB5_1 Depth=1 + ; LA64-NEXT: # => This Inner Loop Header: Depth=2 +-; LA64-NEXT: ll.w $a6, $a2, 0 +-; LA64-NEXT: bne $a6, $a3, .LBB5_5 ++; LA64-NEXT: ll.w $a3, $a2, 0 ++; LA64-NEXT: bne $a3, $a7, .LBB5_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2 +-; LA64-NEXT: move $t0, $a7 ++; LA64-NEXT: move $t0, $a6 + ; LA64-NEXT: sc.w $t0, $a2, 0 + ; LA64-NEXT: beqz $t0, .LBB5_3 + ; LA64-NEXT: b .LBB5_6 +@@ -287,11 +284,9 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB5_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 +-; LA64-NEXT: addi.w $a7, $a3, 0 +-; LA64-NEXT: move $a3, $a6 +-; LA64-NEXT: bne $a6, $a7, .LBB5_1 ++; LA64-NEXT: bne $a3, $a7, .LBB5_1 + ; LA64-NEXT: # %bb.2: # %atomicrmw.end +-; LA64-NEXT: srl.w $a0, $a6, $a0 ++; LA64-NEXT: srl.w $a0, $a3, $a0 + ; LA64-NEXT: ret + %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +@@ -300,22 +295,22 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { + define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { + ; LA64-LABEL: atomicrmw_udec_wrap_i32: + ; LA64: # %bb.0: +-; LA64-NEXT: ld.w $a4, $a0, 0 ++; LA64-NEXT: ld.w $a2, $a0, 0 + ; LA64-NEXT: addi.w $a3, $a1, 0 + ; LA64-NEXT: .p2align 4, , 16 + ; LA64-NEXT: .LBB6_1: # %atomicrmw.start + ; LA64-NEXT: # =>This Loop Header: Depth=1 + ; LA64-NEXT: # Child Loop BB6_3 Depth 2 +-; LA64-NEXT: addi.w $a5, $a4, 0 +-; LA64-NEXT: sltu $a2, $a3, $a5 +-; LA64-NEXT: addi.d $a6, $a4, -1 +-; LA64-NEXT: masknez $a6, $a6, $a2 +-; LA64-NEXT: maskeqz $a2, $a1, $a2 +-; LA64-NEXT: or $a2, $a2, $a6 +-; LA64-NEXT: sltui $a6, $a5, 1 +-; LA64-NEXT: masknez $a2, $a2, $a6 +-; LA64-NEXT: maskeqz $a6, $a1, $a6 +-; LA64-NEXT: or $a6, $a6, $a2 ++; LA64-NEXT: addi.w $a4, $a2, 0 ++; LA64-NEXT: sltu $a5, $a3, $a4 ++; LA64-NEXT: addi.d $a2, $a2, -1 ++; LA64-NEXT: masknez $a2, $a2, $a5 ++; LA64-NEXT: maskeqz $a5, $a1, $a5 ++; LA64-NEXT: or $a2, $a5, $a2 ++; LA64-NEXT: sltui $a5, $a4, 1 ++; LA64-NEXT: masknez $a2, $a2, $a5 ++; LA64-NEXT: maskeqz $a5, $a1, $a5 ++; LA64-NEXT: or $a5, $a5, $a2 + ; LA64-NEXT: .LBB6_3: # %atomicrmw.start + ; LA64-NEXT: # Parent Loop BB6_1 Depth=1 + ; LA64-NEXT: # => This Inner Loop Header: Depth=2 +@@ -323,17 +318,16 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { + ; LA64-NEXT: bne $a2, $a4, .LBB6_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2 +-; LA64-NEXT: move $a7, $a6 +-; LA64-NEXT: sc.w $a7, $a0, 0 +-; LA64-NEXT: beqz $a7, .LBB6_3 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sc.w $a6, $a0, 0 ++; LA64-NEXT: beqz $a6, .LBB6_3 + ; LA64-NEXT: b .LBB6_6 + ; LA64-NEXT: .LBB6_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB6_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 +-; LA64-NEXT: move $a4, $a2 +-; LA64-NEXT: bne $a2, $a5, .LBB6_1 ++; LA64-NEXT: bne $a2, $a4, .LBB6_1 + ; LA64-NEXT: # %bb.2: # %atomicrmw.end + ; LA64-NEXT: move $a0, $a2 + ; LA64-NEXT: ret +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +index 1dd3f39852d8..ebb09640e6c9 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +@@ -71,6 +71,7 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind + define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_acquire_acquire: + ; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB2_3 +@@ -176,6 +177,7 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin + define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_acquire_monotonic: + ; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB6_3 +@@ -285,9 +287,10 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou + define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti32: + ; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $a1, 0 + ; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: ll.w $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB10_3 ++; LA64-NEXT: ll.w $a1, $a0, 0 ++; LA64-NEXT: bne $a1, $a3, .LBB10_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 +@@ -296,7 +299,7 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou + ; LA64-NEXT: .LBB10_3: + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB10_4: +-; LA64-NEXT: move $a0, $a3 ++; LA64-NEXT: move $a0, $a1 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire + %res = extractvalue { i32, i1 } %tmp, 0 +@@ -404,6 +407,7 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw + define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti1: + ; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB14_3 +@@ -415,8 +419,7 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw + ; LA64-NEXT: .LBB14_3: + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB14_4: +-; LA64-NEXT: addi.w $a0, $a1, 0 +-; LA64-NEXT: xor $a0, $a3, $a0 ++; LA64-NEXT: xor $a0, $a3, $a1 + ; LA64-NEXT: sltui $a0, $a0, 1 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire +@@ -516,6 +519,7 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw + define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic: + ; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB18_3 +@@ -625,9 +629,10 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) + define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32: + ; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $a1, 0 + ; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: ll.w $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB22_3 ++; LA64-NEXT: ll.w $a1, $a0, 0 ++; LA64-NEXT: bne $a1, $a3, .LBB22_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 +@@ -636,7 +641,7 @@ define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) + ; LA64-NEXT: .LBB22_3: + ; LA64-NEXT: dbar 1792 + ; LA64-NEXT: .LBB22_4: +-; LA64-NEXT: move $a0, $a3 ++; LA64-NEXT: move $a0, $a1 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic + %res = extractvalue { i32, i1 } %tmp, 0 +@@ -744,6 +749,7 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n + define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1: + ; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB26_3 +@@ -755,8 +761,7 @@ define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) n + ; LA64-NEXT: .LBB26_3: + ; LA64-NEXT: dbar 1792 + ; LA64-NEXT: .LBB26_4: +-; LA64-NEXT: addi.w $a0, $a1, 0 +-; LA64-NEXT: xor $a0, $a3, $a0 ++; LA64-NEXT: xor $a0, $a3, $a1 + ; LA64-NEXT: sltui $a0, $a0, 1 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +index 589360823b14..4d8160d70803 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +@@ -16,6 +16,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB0_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB0_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -33,8 +34,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64F-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB0_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB0_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -51,6 +51,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB0_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB0_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -68,8 +69,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64D-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB0_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB0_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, float 1.0 acquire, align 4 +@@ -90,6 +90,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB1_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB1_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -107,8 +108,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64F-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB1_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB1_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -125,6 +125,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB1_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB1_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -142,8 +143,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64D-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB1_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB1_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, float 1.0 acquire, align 4 +@@ -165,6 +165,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB2_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB2_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -182,8 +183,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64F-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB2_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB2_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -201,6 +201,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB2_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB2_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -218,8 +219,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64D-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB2_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB2_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, float 1.0 acquire, align 4 +@@ -241,6 +241,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB3_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB3_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -258,8 +259,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64F-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB3_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB3_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -277,6 +277,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB3_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB3_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -294,8 +295,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64D-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB3_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB3_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, float 1.0 acquire, align 4 +@@ -694,6 +694,7 @@ define float @float_fadd_release(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB8_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB8_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -711,8 +712,7 @@ define float @float_fadd_release(ptr %p) nounwind { + ; LA64F-NEXT: .LBB8_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB8_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB8_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB8_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -729,6 +729,7 @@ define float @float_fadd_release(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB8_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB8_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -746,8 +747,7 @@ define float @float_fadd_release(ptr %p) nounwind { + ; LA64D-NEXT: .LBB8_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB8_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB8_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB8_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, float 1.0 release, align 4 +@@ -768,6 +768,7 @@ define float @float_fsub_release(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB9_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB9_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -785,8 +786,7 @@ define float @float_fsub_release(ptr %p) nounwind { + ; LA64F-NEXT: .LBB9_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB9_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB9_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB9_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -803,6 +803,7 @@ define float @float_fsub_release(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB9_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB9_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -820,8 +821,7 @@ define float @float_fsub_release(ptr %p) nounwind { + ; LA64D-NEXT: .LBB9_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB9_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB9_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB9_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, float 1.0 release, align 4 +@@ -843,6 +843,7 @@ define float @float_fmin_release(ptr %p) nounwind { + ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB10_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB10_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -860,8 +861,7 @@ define float @float_fmin_release(ptr %p) nounwind { + ; LA64F-NEXT: .LBB10_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB10_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB10_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB10_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -879,6 +879,7 @@ define float @float_fmin_release(ptr %p) nounwind { + ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB10_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB10_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -896,8 +897,7 @@ define float @float_fmin_release(ptr %p) nounwind { + ; LA64D-NEXT: .LBB10_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB10_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB10_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB10_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, float 1.0 release, align 4 +@@ -919,6 +919,7 @@ define float @float_fmax_release(ptr %p) nounwind { + ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB11_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB11_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -936,8 +937,7 @@ define float @float_fmax_release(ptr %p) nounwind { + ; LA64F-NEXT: .LBB11_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB11_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB11_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB11_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -955,6 +955,7 @@ define float @float_fmax_release(ptr %p) nounwind { + ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB11_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB11_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -972,8 +973,7 @@ define float @float_fmax_release(ptr %p) nounwind { + ; LA64D-NEXT: .LBB11_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB11_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB11_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB11_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, float 1.0 release, align 4 +@@ -1372,6 +1372,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB16_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB16_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1389,8 +1390,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: .LBB16_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB16_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB16_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -1407,6 +1407,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB16_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB16_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1424,8 +1425,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: .LBB16_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB16_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB16_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, float 1.0 acq_rel, align 4 +@@ -1446,6 +1446,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB17_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB17_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1463,8 +1464,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: .LBB17_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB17_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB17_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -1481,6 +1481,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB17_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB17_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1498,8 +1499,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: .LBB17_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB17_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB17_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, float 1.0 acq_rel, align 4 +@@ -1521,6 +1521,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB18_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB18_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1538,8 +1539,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: .LBB18_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB18_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB18_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -1557,6 +1557,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB18_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB18_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1574,8 +1575,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: .LBB18_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB18_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB18_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, float 1.0 acq_rel, align 4 +@@ -1597,6 +1597,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB19_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB19_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1614,8 +1615,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: .LBB19_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB19_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB19_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -1633,6 +1633,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB19_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB19_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1650,8 +1651,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: .LBB19_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB19_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB19_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, float 1.0 acq_rel, align 4 +@@ -2074,6 +2074,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB24_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB24_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2091,8 +2092,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: .LBB24_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB24_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB24_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -2109,6 +2109,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB24_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB24_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2126,8 +2127,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: .LBB24_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB24_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB24_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, float 1.0 seq_cst, align 4 +@@ -2148,6 +2148,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB25_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB25_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2165,8 +2166,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: .LBB25_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB25_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB25_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -2183,6 +2183,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB25_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB25_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2200,8 +2201,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: .LBB25_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB25_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB25_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, float 1.0 seq_cst, align 4 +@@ -2223,6 +2223,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB26_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB26_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2240,8 +2241,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: .LBB26_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB26_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB26_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -2259,6 +2259,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB26_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB26_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2276,8 +2277,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: .LBB26_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB26_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB26_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, float 1.0 seq_cst, align 4 +@@ -2299,6 +2299,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB27_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB27_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2316,8 +2317,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: .LBB27_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB27_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB27_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -2335,6 +2335,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB27_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB27_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2352,8 +2353,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: .LBB27_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB27_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB27_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, float 1.0 seq_cst, align 4 +@@ -2752,6 +2752,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB32_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB32_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2769,8 +2770,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: .LBB32_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB32_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB32_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB32_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -2787,6 +2787,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB32_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB32_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2804,8 +2805,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: .LBB32_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB32_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB32_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB32_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, float 1.0 monotonic, align 4 +@@ -2826,6 +2826,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB33_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB33_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2843,8 +2844,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: .LBB33_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB33_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB33_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB33_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -2861,6 +2861,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB33_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB33_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2878,8 +2879,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: .LBB33_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB33_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB33_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB33_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, float 1.0 monotonic, align 4 +@@ -2901,6 +2901,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB34_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB34_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2918,8 +2919,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: .LBB34_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB34_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB34_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB34_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -2937,6 +2937,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB34_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB34_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2954,8 +2955,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: .LBB34_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB34_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB34_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB34_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, float 1.0 monotonic, align 4 +@@ -2977,6 +2977,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB35_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB35_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2994,8 +2995,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: .LBB35_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB35_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB35_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB35_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -3013,6 +3013,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB35_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB35_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -3030,8 +3031,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: .LBB35_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB35_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB35_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB35_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, float 1.0 monotonic, align 4 +-- +2.20.1 + + +From 4713504e1495a3831ba1a47078de83db3a8a533c Mon Sep 17 00:00:00 2001 +From: Lu Weining <luweining@loongson.cn> +Date: Thu, 19 Oct 2023 10:16:13 +0800 +Subject: PATCH 7/7 Memory Call __clear_cache in InvalidateInstructionCache + on LoongArch (#67285) + +As the comments of `InvalidateInstructionCache`: Before the JIT can run +a block of code that has been emitted it must invalidate the instruction +cache on some platforms. I think it applies to LoongArch as LoongArch +has a weak memory-model. But I'm not able to write a test to demonstrate +this issue. Perhaps self-modifing code should be wrote? + +(cherry picked from commit fb366581e7d67df7d9a98605fd65a7e7908451e7) +--- + llvm/lib/Support/Unix/Memory.inc | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/llvm/lib/Support/Unix/Memory.inc b/llvm/lib/Support/Unix/Memory.inc +index 4c8f6b2ea7d3..69bd1164343d 100644 +--- a/llvm/lib/Support/Unix/Memory.inc ++++ b/llvm/lib/Support/Unix/Memory.inc +@@ -237,7 +237,8 @@ void Memory::InvalidateInstructionCache(const void *Addr, size_t Len) { + for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) + asm volatile("icbi 0, %0" : : "r"(Line)); + asm volatile("isync"); +-#elif (defined(__arm__) || defined(__aarch64__) || defined(__mips__)) && \ ++#elif (defined(__arm__) || defined(__aarch64__) || defined(__loongarch__) || \ ++ defined(__mips__)) && \ + defined(__GNUC__) + // FIXME: Can we safely always call this for __GNUC__ everywhere? + const char *Start = static_cast<const char *>(Addr); +-- +2.20.1 +
View file
_service:tar_scm:0020-Update-llvm-lit-config-to-support-build_for_openeule.patch
Added
@@ -0,0 +1,30 @@ +From 5721be433ddee5f60d4a9434df43a023f1ec4c0e Mon Sep 17 00:00:00 2001 +From: wangqiang <wangqiang1@kylinos.cn> +Date: Sun, 28 Apr 2024 14:30:34 +0800 +Subject: PATCH Update llvm-lit config to support build_for_openeuler + +--- + llvm/cmake/modules/HandleLLVMOptions.cmake | 7 +++++++ + 1 files changed, 7 insertions(+) + +diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake +index 76723be69..c6f5569af 100644 +--- a/llvm/cmake/modules/HandleLLVMOptions.cmake ++++ b/llvm/cmake/modules/HandleLLVMOptions.cmake +@@ -97,6 +97,13 @@ if( LLVM_ENABLE_ASSERTIONS ) + set(LLVM_ENABLE_CLASSIC_FLANG 0) + endif() + ++option(BUILD_FOR_OPENEULER "Build support for openeuler" OFF) ++if(BUILD_FOR_OPENEULER) ++ set(BUILD_FOR_OPENEULER 1) ++else() ++ set(BUILD_FOR_OPENEULER 0) ++endif() ++ + if(LLVM_ENABLE_EXPENSIVE_CHECKS) + add_compile_definitions(EXPENSIVE_CHECKS) + +-- +2.33.0 +
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2