[flang-commits] [clang] [flang] [llvm] [mlir] Add emitAtomicCompareExchangeBuiltin helper function (PR #101966)
Michael Kruse via flang-commits
flang-commits at lists.llvm.org
Thu Nov 14 13:48:33 PST 2024
https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/101966
>From f166a90cba97a3c7cb979d0ec44b74f6e78e23d2 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Mon, 5 Aug 2024 13:35:40 +0200
Subject: [PATCH 01/17] Introduce emitAtomicCompareExchangeBuiltin
---
clang/include/clang/CodeGen/BackendUtil.h | 10 +
clang/include/clang/CodeGen/ModuleBuilder.h | 2 +
.../include/clang/Frontend/CompilerInstance.h | 5 +
clang/lib/CodeGen/BackendConsumer.h | 4 +-
clang/lib/CodeGen/BackendUtil.cpp | 8 +-
clang/lib/CodeGen/CGAtomic.cpp | 246 +-
clang/lib/CodeGen/CMakeLists.txt | 1 +
clang/lib/CodeGen/CodeGenAction.cpp | 11 +-
clang/lib/CodeGen/CodeGenFunction.cpp | 3 +
clang/lib/CodeGen/CodeGenFunction.h | 5 +
clang/lib/CodeGen/CodeGenModule.cpp | 15 +-
clang/lib/CodeGen/CodeGenModule.h | 9 +-
clang/lib/CodeGen/ModuleBuilder.cpp | 27 +-
.../CodeGen/ObjectFilePCHContainerWriter.cpp | 9 +-
clang/lib/Frontend/CMakeLists.txt | 1 +
clang/lib/Frontend/CompilerInstance.cpp | 32 +
.../PowerPC/builtins-ppc-xlcompat-cas.c | 42 +-
.../SystemZ/gnu-atomic-builtins-i128-16Al.c | 34 +-
.../SystemZ/gnu-atomic-builtins-i128-8Al.c | 34 +-
.../CodeGen/SystemZ/gnu-atomic-builtins-i16.c | 34 +-
.../CodeGen/SystemZ/gnu-atomic-builtins-i32.c | 34 +-
.../CodeGen/SystemZ/gnu-atomic-builtins-i64.c | 34 +-
.../CodeGen/SystemZ/gnu-atomic-builtins-i8.c | 34 +-
.../test/CodeGen/X86/x86-atomic-long_double.c | 30 +-
clang/test/CodeGen/atomic-ops.c | 432 +-
clang/test/CodeGen/atomic_ops.c | 364 +-
clang/test/CodeGen/c11atomics-ios.c | 207 +-
clang/test/CodeGen/c11atomics.c | 771 ++-
.../CodeGen/sanitize-atomic-int-overflow.c | 21 +-
clang/test/CodeGenCUDA/atomic-ops.cu | 2355 ++++++++-
clang/test/CodeGenOpenCL/atomic-ops.cl | 823 ++-
.../llvm/Analysis/TargetLibraryInfo.def | 33 +
llvm/include/llvm/IR/RuntimeLibcalls.def | 1 +
llvm/include/llvm/MC/TargetRegistry.h | 7 +-
llvm/include/llvm/Support/AtomicOrdering.h | 22 +
llvm/include/llvm/Target/TargetMachine.h | 4 +
.../llvm/Transforms/Utils/BuildBuiltins.h | 129 +
.../llvm/Transforms/Utils/BuildLibCalls.h | 19 +
llvm/lib/Analysis/TargetLibraryInfo.cpp | 10 +
llvm/lib/CodeGen/AtomicExpandPass.cpp | 72 +-
llvm/lib/CodeGen/LLVMTargetMachine.cpp | 4 +
llvm/lib/MC/TargetRegistry.cpp | 16 +
llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 2 -
llvm/lib/Transforms/Utils/BuildBuiltins.cpp | 587 +++
llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 74 +
llvm/lib/Transforms/Utils/CMakeLists.txt | 1 +
.../AArch64/Atomics/aarch64-atomicrmw-lse2.ll | 280 +-
.../Atomics/aarch64-atomicrmw-lse2_lse128.ll | 280 +-
.../aarch64-atomicrmw-outline_atomics.ll | 280 +-
.../AArch64/Atomics/aarch64-atomicrmw-rcpc.ll | 280 +-
.../Atomics/aarch64-atomicrmw-rcpc3.ll | 280 +-
.../Atomics/aarch64-atomicrmw-v8_1a.ll | 280 +-
.../AArch64/Atomics/aarch64-atomicrmw-v8a.ll | 280 +-
.../Atomics/aarch64_be-atomicrmw-lse2.ll | 280 +-
.../aarch64_be-atomicrmw-lse2_lse128.ll | 280 +-
.../aarch64_be-atomicrmw-outline_atomics.ll | 280 +-
.../Atomics/aarch64_be-atomicrmw-rcpc.ll | 280 +-
.../Atomics/aarch64_be-atomicrmw-rcpc3.ll | 280 +-
.../Atomics/aarch64_be-atomicrmw-v8_1a.ll | 280 +-
.../Atomics/aarch64_be-atomicrmw-v8a.ll | 280 +-
.../AMDGPU/global_atomics_scan_fadd.ll | 4504 +++-------------
.../AMDGPU/global_atomics_scan_fmax.ll | 4390 +++-------------
.../AMDGPU/global_atomics_scan_fmin.ll | 4390 +++-------------
.../AMDGPU/global_atomics_scan_fsub.ll | 4508 +++--------------
.../ARM/atomicrmw_exclusive_monitor_ints.ll | 116 +-
llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll | 88 +-
.../LoongArch/ir-instruction/atomicrmw-fp.ll | 780 +--
llvm/test/CodeGen/PowerPC/all-atomics.ll | 758 +--
llvm/test/CodeGen/PowerPC/atomics-i128.ll | 948 ++--
llvm/test/CodeGen/PowerPC/atomics.ll | 27 +-
llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll | 900 +++-
llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll | 148 +-
llvm/test/CodeGen/RISCV/atomic-rmw.ll | 4048 ++++++++-------
llvm/test/CodeGen/RISCV/atomic-signext.ll | 1056 ++--
.../CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll | 435 +-
llvm/test/CodeGen/RISCV/forced-atomics.ll | 1140 +++--
.../CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll | 22 +-
llvm/test/CodeGen/X86/atomic-idempotent.ll | 260 +-
llvm/test/CodeGen/X86/atomic-xor.ll | 14 +-
llvm/test/CodeGen/X86/atomic64.ll | 78 +-
llvm/test/CodeGen/X86/cmpxchg8b.ll | 8 +-
.../AMDGPU/expand-atomic-f64-agent.ll | 198 +-
.../AMDGPU/expand-atomic-f64-system.ll | 198 +-
.../AMDGPU/expand-atomic-fp128.ll | 108 +-
.../AtomicExpand/AMDGPU/expand-atomic-i128.ll | 171 +-
.../AMDGPU/expand-atomicrmw-fp-vector.ll | 264 +-
.../AtomicExpand/PowerPC/cmpxchg.ll | 22 +-
.../AtomicExpand/RISCV/atomicrmw-fp.ll | 58 +-
.../Transforms/AtomicExpand/SPARC/libcalls.ll | 283 +-
.../tools/llvm-tli-checker/ps4-tli-check.yaml | 32 +-
.../Analysis/TargetLibraryInfoTest.cpp | 7 +
91 files changed, 18111 insertions(+), 22386 deletions(-)
create mode 100644 llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
create mode 100644 llvm/lib/Transforms/Utils/BuildBuiltins.cpp
diff --git a/clang/include/clang/CodeGen/BackendUtil.h b/clang/include/clang/CodeGen/BackendUtil.h
index fc8ed4f011f922..f38166f32afdb9 100644
--- a/clang/include/clang/CodeGen/BackendUtil.h
+++ b/clang/include/clang/CodeGen/BackendUtil.h
@@ -19,6 +19,7 @@ namespace llvm {
template <typename T> class IntrusiveRefCntPtr;
class Module;
class MemoryBufferRef;
+ class TargetOptions;
namespace vfs {
class FileSystem;
} // namespace vfs
@@ -54,6 +55,15 @@ namespace clang {
void EmbedObject(llvm::Module *M, const CodeGenOptions &CGOpts,
DiagnosticsEngine &Diags);
+
+ std::optional<llvm::CodeModel::Model>
+ getCodeModel(const CodeGenOptions &CodeGenOpts);
+
+ bool initTargetOptions(DiagnosticsEngine &Diags, llvm::TargetOptions &Options,
+ const CodeGenOptions &CodeGenOpts,
+ const clang::TargetOptions &TargetOpts,
+ const LangOptions &LangOpts,
+ const HeaderSearchOptions &HSOpts);
}
#endif
diff --git a/clang/include/clang/CodeGen/ModuleBuilder.h b/clang/include/clang/CodeGen/ModuleBuilder.h
index 59b9840d02e086..382e26353719a7 100644
--- a/clang/include/clang/CodeGen/ModuleBuilder.h
+++ b/clang/include/clang/CodeGen/ModuleBuilder.h
@@ -22,6 +22,7 @@ namespace llvm {
class LLVMContext;
class Module;
class StringRef;
+ class TargetMachine;
namespace vfs {
class FileSystem;
@@ -112,6 +113,7 @@ CodeGenerator *CreateLLVMCodeGen(DiagnosticsEngine &Diags,
const PreprocessorOptions &PreprocessorOpts,
const CodeGenOptions &CGO,
llvm::LLVMContext &C,
+ llvm::TargetMachine *TM = nullptr,
CoverageSourceInfo *CoverageInfo = nullptr);
} // end namespace clang
diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h
index 3464654284f199..9fd15ecda4dfbe 100644
--- a/clang/include/clang/Frontend/CompilerInstance.h
+++ b/clang/include/clang/Frontend/CompilerInstance.h
@@ -35,6 +35,7 @@ namespace llvm {
class raw_fd_ostream;
class Timer;
class TimerGroup;
+class TargetMachine;
}
namespace clang {
@@ -86,6 +87,8 @@ class CompilerInstance : public ModuleLoader {
/// The target being compiled for.
IntrusiveRefCntPtr<TargetInfo> Target;
+ std::unique_ptr<llvm::TargetMachine> TM;
+
/// Auxiliary Target info.
IntrusiveRefCntPtr<TargetInfo> AuxTarget;
@@ -357,6 +360,8 @@ class CompilerInstance : public ModuleLoader {
return Invocation->getTargetOpts();
}
+ llvm::TargetMachine *getTargetMachine() const { return TM.get(); }
+
/// @}
/// @name Diagnostics Engine
/// @{
diff --git a/clang/lib/CodeGen/BackendConsumer.h b/clang/lib/CodeGen/BackendConsumer.h
index a023d29cbd1d73..d7eecc265f896c 100644
--- a/clang/lib/CodeGen/BackendConsumer.h
+++ b/clang/lib/CodeGen/BackendConsumer.h
@@ -17,6 +17,7 @@
namespace llvm {
class DiagnosticInfoDontCall;
+ class TargetMachine;
}
namespace clang {
@@ -78,6 +79,7 @@ class BackendConsumer : public ASTConsumer {
const std::string &InFile,
SmallVector<LinkModule, 4> LinkModules,
std::unique_ptr<raw_pwrite_stream> OS, llvm::LLVMContext &C,
+ llvm::TargetMachine *TM,
CoverageSourceInfo *CoverageInfo = nullptr);
// This constructor is used in installing an empty BackendConsumer
@@ -90,7 +92,7 @@ class BackendConsumer : public ASTConsumer {
const CodeGenOptions &CodeGenOpts,
const TargetOptions &TargetOpts, const LangOptions &LangOpts,
llvm::Module *Module, SmallVector<LinkModule, 4> LinkModules,
- llvm::LLVMContext &C,
+ llvm::LLVMContext &C, llvm::TargetMachine *TM,
CoverageSourceInfo *CoverageInfo = nullptr);
llvm::Module *getModule() const;
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index e765bbf637a661..03c1e5969099b6 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -289,8 +289,8 @@ static bool asanUseGlobalsGC(const Triple &T, const CodeGenOptions &CGOpts) {
return false;
}
-static std::optional<llvm::CodeModel::Model>
-getCodeModel(const CodeGenOptions &CodeGenOpts) {
+std::optional<llvm::CodeModel::Model>
+clang::getCodeModel(const CodeGenOptions &CodeGenOpts) {
unsigned CodeModel = llvm::StringSwitch<unsigned>(CodeGenOpts.CodeModel)
.Case("tiny", llvm::CodeModel::Tiny)
.Case("small", llvm::CodeModel::Small)
@@ -321,7 +321,7 @@ static bool actionRequiresCodeGen(BackendAction Action) {
Action != Backend_EmitLL;
}
-static bool initTargetOptions(DiagnosticsEngine &Diags,
+bool clang::initTargetOptions(DiagnosticsEngine &Diags,
llvm::TargetOptions &Options,
const CodeGenOptions &CodeGenOpts,
const clang::TargetOptions &TargetOpts,
@@ -561,7 +561,7 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
std::string FeaturesStr =
llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ",");
llvm::Reloc::Model RM = CodeGenOpts.RelocationModel;
- std::optional<CodeGenOptLevel> OptLevelOrNone =
+ std::optional<llvm::CodeGenOptLevel> OptLevelOrNone =
CodeGenOpt::getLevel(CodeGenOpts.OptimizationLevel);
assert(OptLevelOrNone && "Invalid optimization level!");
CodeGenOptLevel OptLevel = *OptLevelOrNone;
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index fbe9569e50ef63..40c44df58ba5ef 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Operator.h"
+#include "llvm/Transforms/Utils/BuildBuiltins.h"
using namespace clang;
using namespace CodeGen;
@@ -133,7 +134,9 @@ namespace {
QualType getValueType() const { return ValueTy; }
CharUnits getAtomicAlignment() const { return AtomicAlign; }
uint64_t getAtomicSizeInBits() const { return AtomicSizeInBits; }
+ uint64_t getAtomicSizeInBytes() const { return AtomicSizeInBits / 8; }
uint64_t getValueSizeInBits() const { return ValueSizeInBits; }
+ uint64_t getValueSizeInBytes() const { return ValueSizeInBits / 8; }
TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; }
bool shouldUseLibcall() const { return UseLibcall; }
const LValue &getAtomicLValue() const { return LVal; }
@@ -374,130 +377,6 @@ bool AtomicInfo::emitMemSetZeroIfNecessary() const {
return true;
}
-static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
- Address Dest, Address Ptr,
- Address Val1, Address Val2,
- uint64_t Size,
- llvm::AtomicOrdering SuccessOrder,
- llvm::AtomicOrdering FailureOrder,
- llvm::SyncScope::ID Scope) {
- // Note that cmpxchg doesn't support weak cmpxchg, at least at the moment.
- llvm::Value *Expected = CGF.Builder.CreateLoad(Val1);
- llvm::Value *Desired = CGF.Builder.CreateLoad(Val2);
-
- llvm::AtomicCmpXchgInst *Pair = CGF.Builder.CreateAtomicCmpXchg(
- Ptr, Expected, Desired, SuccessOrder, FailureOrder, Scope);
- Pair->setVolatile(E->isVolatile());
- Pair->setWeak(IsWeak);
-
- // Cmp holds the result of the compare-exchange operation: true on success,
- // false on failure.
- llvm::Value *Old = CGF.Builder.CreateExtractValue(Pair, 0);
- llvm::Value *Cmp = CGF.Builder.CreateExtractValue(Pair, 1);
-
- // This basic block is used to hold the store instruction if the operation
- // failed.
- llvm::BasicBlock *StoreExpectedBB =
- CGF.createBasicBlock("cmpxchg.store_expected", CGF.CurFn);
-
- // This basic block is the exit point of the operation, we should end up
- // here regardless of whether or not the operation succeeded.
- llvm::BasicBlock *ContinueBB =
- CGF.createBasicBlock("cmpxchg.continue", CGF.CurFn);
-
- // Update Expected if Expected isn't equal to Old, otherwise branch to the
- // exit point.
- CGF.Builder.CreateCondBr(Cmp, ContinueBB, StoreExpectedBB);
-
- CGF.Builder.SetInsertPoint(StoreExpectedBB);
- // Update the memory at Expected with Old's value.
- CGF.Builder.CreateStore(Old, Val1);
- // Finally, branch to the exit point.
- CGF.Builder.CreateBr(ContinueBB);
-
- CGF.Builder.SetInsertPoint(ContinueBB);
- // Update the memory at Dest with Cmp's value.
- CGF.EmitStoreOfScalar(Cmp, CGF.MakeAddrLValue(Dest, E->getType()));
-}
-
-/// Given an ordering required on success, emit all possible cmpxchg
-/// instructions to cope with the provided (but possibly only dynamically known)
-/// FailureOrder.
-static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
- bool IsWeak, Address Dest, Address Ptr,
- Address Val1, Address Val2,
- llvm::Value *FailureOrderVal,
- uint64_t Size,
- llvm::AtomicOrdering SuccessOrder,
- llvm::SyncScope::ID Scope) {
- llvm::AtomicOrdering FailureOrder;
- if (llvm::ConstantInt *FO = dyn_cast<llvm::ConstantInt>(FailureOrderVal)) {
- auto FOS = FO->getSExtValue();
- if (!llvm::isValidAtomicOrderingCABI(FOS))
- FailureOrder = llvm::AtomicOrdering::Monotonic;
- else
- switch ((llvm::AtomicOrderingCABI)FOS) {
- case llvm::AtomicOrderingCABI::relaxed:
- // 31.7.2.18: "The failure argument shall not be memory_order_release
- // nor memory_order_acq_rel". Fallback to monotonic.
- case llvm::AtomicOrderingCABI::release:
- case llvm::AtomicOrderingCABI::acq_rel:
- FailureOrder = llvm::AtomicOrdering::Monotonic;
- break;
- case llvm::AtomicOrderingCABI::consume:
- case llvm::AtomicOrderingCABI::acquire:
- FailureOrder = llvm::AtomicOrdering::Acquire;
- break;
- case llvm::AtomicOrderingCABI::seq_cst:
- FailureOrder = llvm::AtomicOrdering::SequentiallyConsistent;
- break;
- }
- // Prior to c++17, "the failure argument shall be no stronger than the
- // success argument". This condition has been lifted and the only
- // precondition is 31.7.2.18. Effectively treat this as a DR and skip
- // language version checks.
- emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
- FailureOrder, Scope);
- return;
- }
-
- // Create all the relevant BB's
- auto *MonotonicBB = CGF.createBasicBlock("monotonic_fail", CGF.CurFn);
- auto *AcquireBB = CGF.createBasicBlock("acquire_fail", CGF.CurFn);
- auto *SeqCstBB = CGF.createBasicBlock("seqcst_fail", CGF.CurFn);
- auto *ContBB = CGF.createBasicBlock("atomic.continue", CGF.CurFn);
-
- // MonotonicBB is arbitrarily chosen as the default case; in practice, this
- // doesn't matter unless someone is crazy enough to use something that
- // doesn't fold to a constant for the ordering.
- llvm::SwitchInst *SI = CGF.Builder.CreateSwitch(FailureOrderVal, MonotonicBB);
- // Implemented as acquire, since it's the closest in LLVM.
- SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
- AcquireBB);
- SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::acquire),
- AcquireBB);
- SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
- SeqCstBB);
-
- // Emit all the different atomics
- CGF.Builder.SetInsertPoint(MonotonicBB);
- emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
- Size, SuccessOrder, llvm::AtomicOrdering::Monotonic, Scope);
- CGF.Builder.CreateBr(ContBB);
-
- CGF.Builder.SetInsertPoint(AcquireBB);
- emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
- llvm::AtomicOrdering::Acquire, Scope);
- CGF.Builder.CreateBr(ContBB);
-
- CGF.Builder.SetInsertPoint(SeqCstBB);
- emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
- llvm::AtomicOrdering::SequentiallyConsistent, Scope);
- CGF.Builder.CreateBr(ContBB);
-
- CGF.Builder.SetInsertPoint(ContBB);
-}
-
/// Duplicate the atomic min/max operation in conventional IR for the builtin
/// variants that return the new rather than the original value.
static llvm::Value *EmitPostAtomicMinMax(CGBuilderTy &Builder,
@@ -531,53 +410,66 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
bool PostOpMinMax = false;
unsigned PostOp = 0;
+ bool IsWeakOp = false;
switch (E->getOp()) {
case AtomicExpr::AO__c11_atomic_init:
case AtomicExpr::AO__opencl_atomic_init:
llvm_unreachable("Already handled!");
- case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
- case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
- case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
- emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order, Scope);
- return;
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
- emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order, Scope);
+ IsWeakOp = true;
+ [[fallthrough]];
+ case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+ case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
+ case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: {
+ llvm::Value *LLVMPtr = Ptr.emitRawPointer(CGF);
+ llvm::Value *Expected = Val1.emitRawPointer(CGF);
+ llvm::Value *Desired = Val2.emitRawPointer(CGF);
+ llvm::Align Align = Ptr.getAlignment().getAsAlign();
+
+ SmallVector<std::pair<uint32_t, StringRef>> SupportedScopes;
+ StringRef DefaultScope;
+ if (std::unique_ptr<AtomicScopeModel> ScopeModel = E->getScopeModel()) {
+ for (unsigned S : ScopeModel->getRuntimeValues())
+ SupportedScopes.emplace_back(S, getAsString(ScopeModel->map(S)));
+ DefaultScope =
+ getAsString(ScopeModel->map(ScopeModel->getFallBackValue()));
+ }
+
+ llvm::emitAtomicCompareExchangeBuiltin(
+ LLVMPtr, Expected, Desired, IsWeakOp, E->isVolatile(), Order,
+ FailureOrder, Scope, Expected, Ptr.getElementType(), {}, {}, Align,
+ CGF.Builder, CGF.CGM.getDataLayout(), CGF.getTargetLibraryInfo(),
+ CGF.CGM.getTargetLowering(), SupportedScopes, DefaultScope);
return;
+ }
+
case AtomicExpr::AO__atomic_compare_exchange:
case AtomicExpr::AO__atomic_compare_exchange_n:
case AtomicExpr::AO__scoped_atomic_compare_exchange:
case AtomicExpr::AO__scoped_atomic_compare_exchange_n: {
- if (llvm::ConstantInt *IsWeakC = dyn_cast<llvm::ConstantInt>(IsWeak)) {
- emitAtomicCmpXchgFailureSet(CGF, E, IsWeakC->getZExtValue(), Dest, Ptr,
- Val1, Val2, FailureOrder, Size, Order, Scope);
- } else {
- // Create all the relevant BB's
- llvm::BasicBlock *StrongBB =
- CGF.createBasicBlock("cmpxchg.strong", CGF.CurFn);
- llvm::BasicBlock *WeakBB = CGF.createBasicBlock("cmxchg.weak", CGF.CurFn);
- llvm::BasicBlock *ContBB =
- CGF.createBasicBlock("cmpxchg.continue", CGF.CurFn);
-
- llvm::SwitchInst *SI = CGF.Builder.CreateSwitch(IsWeak, WeakBB);
- SI->addCase(CGF.Builder.getInt1(false), StrongBB);
-
- CGF.Builder.SetInsertPoint(StrongBB);
- emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order, Scope);
- CGF.Builder.CreateBr(ContBB);
-
- CGF.Builder.SetInsertPoint(WeakBB);
- emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order, Scope);
- CGF.Builder.CreateBr(ContBB);
-
- CGF.Builder.SetInsertPoint(ContBB);
+ llvm::Value *LLVMPtr = Ptr.emitRawPointer(CGF);
+ llvm::Value *Expected = Val1.emitRawPointer(CGF);
+ llvm::Value *Desired = Val2.emitRawPointer(CGF);
+ llvm::Align Align = Ptr.getAlignment().getAsAlign();
+
+ SmallVector<std::pair<uint32_t, StringRef>> SupportedScopes;
+ StringRef DefaultScope;
+ if (std::unique_ptr<AtomicScopeModel> ScopeModel = E->getScopeModel()) {
+ for (unsigned S : ScopeModel->getRuntimeValues())
+ SupportedScopes.emplace_back(S, getAsString(ScopeModel->map(S)));
+ DefaultScope =
+ getAsString(ScopeModel->map(ScopeModel->getFallBackValue()));
}
+
+ llvm::Value *SuccessVal = llvm::emitAtomicCompareExchangeBuiltin(
+ LLVMPtr, Expected, Desired, IsWeak, E->isVolatile(), Order,
+ FailureOrder, Scope, Expected, Ptr.getElementType(), {}, {}, Align,
+ CGF.Builder, CGF.CGM.getDataLayout(), CGF.getTargetLibraryInfo(),
+ CGF.CGM.getTargetLowering(), SupportedScopes, DefaultScope);
+ CGF.EmitStoreOfScalar(SuccessVal, CGF.MakeAddrLValue(Dest, E->getType()));
return;
}
case AtomicExpr::AO__c11_atomic_load:
@@ -1679,31 +1571,23 @@ AtomicInfo::EmitAtomicCompareExchangeLibcall(llvm::Value *ExpectedAddr,
std::pair<RValue, llvm::Value *> AtomicInfo::EmitAtomicCompareExchange(
RValue Expected, RValue Desired, llvm::AtomicOrdering Success,
llvm::AtomicOrdering Failure, bool IsWeak) {
- // Check whether we should use a library call.
- if (shouldUseLibcall()) {
- // Produce a source address.
- Address ExpectedAddr = materializeRValue(Expected);
- llvm::Value *ExpectedPtr = ExpectedAddr.emitRawPointer(CGF);
- llvm::Value *DesiredPtr = materializeRValue(Desired).emitRawPointer(CGF);
- auto *Res = EmitAtomicCompareExchangeLibcall(ExpectedPtr, DesiredPtr,
- Success, Failure);
- return std::make_pair(
- convertAtomicTempToRValue(ExpectedAddr, AggValueSlot::ignored(),
- SourceLocation(), /*AsValue=*/false),
- Res);
- }
-
- // If we've got a scalar value of the right size, try to avoid going
- // through memory.
- auto *ExpectedVal = convertRValueToInt(Expected, /*CmpXchg=*/true);
- auto *DesiredVal = convertRValueToInt(Desired, /*CmpXchg=*/true);
- auto Res = EmitAtomicCompareExchangeOp(ExpectedVal, DesiredVal, Success,
- Failure, IsWeak);
+ llvm::Value *Ptr = getAtomicPointer();
+ Address ExpectedAddr = materializeRValue(Expected);
+ llvm::Value *ExpectedPtr = ExpectedAddr.emitRawPointer(CGF);
+ llvm::Value *DesiredPtr = materializeRValue(Desired).emitRawPointer(CGF);
+ Address PrevAddr = CreateTempAlloca();
+ llvm::Value *PrevPtr = PrevAddr.emitRawPointer(CGF);
+
+ llvm::Value *SuccessResult = llvm::emitAtomicCompareExchangeBuiltin(
+ Ptr, ExpectedPtr, DesiredPtr, IsWeak, LVal.isVolatileQualified(), Success,
+ Failure, PrevPtr, getAtomicAddress().getElementType(),
+ getValueSizeInBytes(), getAtomicSizeInBytes(),
+ getAtomicAlignment().getAsAlign(), CGF.Builder, CGF.CGM.getDataLayout(),
+ CGF.getTargetLibraryInfo(), CGF.CGM.getTargetLowering());
return std::make_pair(
- ConvertToValueOrAtomic(Res.first, AggValueSlot::ignored(),
- SourceLocation(), /*AsValue=*/false,
- /*CmpXchg=*/true),
- Res.second);
+ convertAtomicTempToRValue(PrevAddr, AggValueSlot::ignored(),
+ SourceLocation(), /*AsValue=*/false),
+ SuccessResult);
}
static void
diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt
index deb7b27266d736..e2a21420d68bb9 100644
--- a/clang/lib/CodeGen/CMakeLists.txt
+++ b/clang/lib/CodeGen/CMakeLists.txt
@@ -27,6 +27,7 @@ set(LLVM_LINK_COMPONENTS
Passes
ProfileData
ScalarOpts
+ SelectionDAG
Support
Target
TargetParser
diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp
index e87226e60297c0..4e8e77b7e38c81 100644
--- a/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/clang/lib/CodeGen/CodeGenAction.cpp
@@ -116,14 +116,14 @@ BackendConsumer::BackendConsumer(
const TargetOptions &TargetOpts, const LangOptions &LangOpts,
const std::string &InFile, SmallVector<LinkModule, 4> LinkModules,
std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C,
- CoverageSourceInfo *CoverageInfo)
+ llvm::TargetMachine *TM, CoverageSourceInfo *CoverageInfo)
: Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
AsmOutStream(std::move(OS)), Context(nullptr), FS(VFS),
LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
LLVMIRGenerationRefCount(0),
Gen(CreateLLVMCodeGen(Diags, InFile, std::move(VFS), HeaderSearchOpts,
- PPOpts, CodeGenOpts, C, CoverageInfo)),
+ PPOpts, CodeGenOpts, C, TM, CoverageInfo)),
LinkModules(std::move(LinkModules)) {
TimerIsEnabled = CodeGenOpts.TimePasses;
llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses;
@@ -140,14 +140,14 @@ BackendConsumer::BackendConsumer(
const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts,
const TargetOptions &TargetOpts, const LangOptions &LangOpts,
llvm::Module *Module, SmallVector<LinkModule, 4> LinkModules,
- LLVMContext &C, CoverageSourceInfo *CoverageInfo)
+ LLVMContext &C, llvm::TargetMachine *TM, CoverageSourceInfo *CoverageInfo)
: Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
Context(nullptr), FS(VFS),
LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
LLVMIRGenerationRefCount(0),
Gen(CreateLLVMCodeGen(Diags, "", std::move(VFS), HeaderSearchOpts, PPOpts,
- CodeGenOpts, C, CoverageInfo)),
+ CodeGenOpts, C, TM, CoverageInfo)),
LinkModules(std::move(LinkModules)), CurLinkModule(Module) {
TimerIsEnabled = CodeGenOpts.TimePasses;
llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses;
@@ -1019,7 +1019,8 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
BA, CI.getDiagnostics(), &CI.getVirtualFileSystem(),
CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(), CI.getCodeGenOpts(),
CI.getTargetOpts(), CI.getLangOpts(), std::string(InFile),
- std::move(LinkModules), std::move(OS), *VMContext, CoverageInfo));
+ std::move(LinkModules), std::move(OS), *VMContext, CI.getTargetMachine(),
+ CoverageInfo));
BEConsumer = Result.get();
// Enable generating macro debug info only when debug info is not disabled and
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index af201554898f31..d02eac9abb2d22 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -36,6 +36,7 @@
#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/Frontend/FrontendDiagnostic.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -1519,6 +1520,8 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
if (checkIfFunctionMustProgress())
CurFn->addFnAttr(llvm::Attribute::MustProgress);
+ TLI.reset(new llvm::TargetLibraryInfo(CGM.getTargetLibraryInfoImpl(), Fn));
+
// Generate the body of the function.
PGO.assignRegionCounters(GD, CurFn);
if (isa<CXXDestructorDecl>(FD))
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 1911fbac100c5c..516bbb032c3ca4 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -285,6 +285,7 @@ class CodeGenFunction : public CodeGenTypeCache {
CodeGenModule &CGM; // Per-module state.
const TargetInfo &Target;
+ std::unique_ptr<llvm::TargetLibraryInfo> TLI;
// For EH/SEH outlined funclets, this field points to parent's CGF
CodeGenFunction *ParentCGF = nullptr;
@@ -2150,6 +2151,10 @@ class CodeGenFunction : public CodeGenTypeCache {
const LangOptions &getLangOpts() const { return CGM.getLangOpts(); }
+ const llvm::TargetLibraryInfo *getTargetLibraryInfo() const {
+ return TLI.get();
+ }
+
/// Returns a pointer to the function's exception object and selector slot,
/// which is assigned in every landing pad.
Address getExceptionSlot();
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 31f6632df9f27d..0146e281b67b00 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -333,18 +333,17 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
return *TheTargetCodeGenInfo;
}
-CodeGenModule::CodeGenModule(ASTContext &C,
- IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
- const HeaderSearchOptions &HSO,
- const PreprocessorOptions &PPO,
- const CodeGenOptions &CGO, llvm::Module &M,
- DiagnosticsEngine &diags,
- CoverageSourceInfo *CoverageInfo)
+CodeGenModule::CodeGenModule(
+ ASTContext &C, IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
+ const HeaderSearchOptions &HSO, const PreprocessorOptions &PPO,
+ const CodeGenOptions &CGO, llvm::Module &M, DiagnosticsEngine &diags,
+ llvm::TargetLibraryInfoImpl &TLII, const llvm::TargetLowering *TL,
+ CoverageSourceInfo *CoverageInfo)
: Context(C), LangOpts(C.getLangOpts()), FS(FS), HeaderSearchOpts(HSO),
PreprocessorOpts(PPO), CodeGenOpts(CGO), TheModule(M), Diags(diags),
Target(C.getTargetInfo()), ABI(createCXXABI(*this)),
VMContext(M.getContext()), Types(*this), VTables(*this),
- SanitizerMD(new SanitizerMetadata(*this)) {
+ SanitizerMD(new SanitizerMetadata(*this)), TLII(TLII), TL(TL) {
// Initialize the type cache.
llvm::LLVMContext &LLVMContext = M.getContext();
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 284bba823baeb4..d3ea293d49fa51 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -49,6 +49,7 @@ class DataLayout;
class FunctionType;
class LLVMContext;
class IndexedInstrProfReader;
+class TargetLowering;
namespace vfs {
class FileSystem;
@@ -307,6 +308,9 @@ class CodeGenModule : public CodeGenTypeCache {
const CodeGenOptions &CodeGenOpts;
unsigned NumAutoVarInit = 0;
llvm::Module &TheModule;
+ llvm::TargetLibraryInfoImpl &TLII;
+ const llvm::TargetLowering *TL;
+
DiagnosticsEngine &Diags;
const TargetInfo &Target;
std::unique_ptr<CGCXXABI> ABI;
@@ -632,7 +636,8 @@ class CodeGenModule : public CodeGenTypeCache {
const HeaderSearchOptions &headersearchopts,
const PreprocessorOptions &ppopts,
const CodeGenOptions &CodeGenOpts, llvm::Module &M,
- DiagnosticsEngine &Diags,
+ DiagnosticsEngine &Diags, llvm::TargetLibraryInfoImpl &TLII,
+ const llvm::TargetLowering *TL,
CoverageSourceInfo *CoverageInfo = nullptr);
~CodeGenModule();
@@ -771,6 +776,8 @@ class CodeGenModule : public CodeGenTypeCache {
const llvm::DataLayout &getDataLayout() const {
return TheModule.getDataLayout();
}
+ llvm::TargetLibraryInfoImpl &getTargetLibraryInfoImpl() const { return TLII; }
+ const llvm::TargetLowering *getTargetLowering() const { return TL; }
const TargetInfo &getTarget() const { return Target; }
const llvm::Triple &getTriple() const { return Target.getTriple(); }
bool supportsCOMDAT() const;
diff --git a/clang/lib/CodeGen/ModuleBuilder.cpp b/clang/lib/CodeGen/ModuleBuilder.cpp
index d4e0ab0339a8b0..06eff706301b47 100644
--- a/clang/lib/CodeGen/ModuleBuilder.cpp
+++ b/clang/lib/CodeGen/ModuleBuilder.cpp
@@ -20,10 +20,13 @@
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/TargetInfo.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/Target/TargetMachine.h"
#include <memory>
using namespace clang;
@@ -63,6 +66,9 @@ namespace {
protected:
std::unique_ptr<llvm::Module> M;
std::unique_ptr<CodeGen::CodeGenModule> Builder;
+ std::unique_ptr<llvm::TargetLibraryInfoImpl> TLII;
+ std::unique_ptr<llvm::TargetLowering> TL;
+ const llvm::TargetMachine *TM;
private:
SmallVector<FunctionDecl *, 8> DeferredInlineMemberFuncDefs;
@@ -79,12 +85,12 @@ namespace {
IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
const HeaderSearchOptions &HSO,
const PreprocessorOptions &PPO, const CodeGenOptions &CGO,
- llvm::LLVMContext &C,
+ llvm::LLVMContext &C, llvm::TargetMachine *TM,
CoverageSourceInfo *CoverageInfo = nullptr)
: Diags(diags), Ctx(nullptr), FS(std::move(FS)), HeaderSearchOpts(HSO),
PreprocessorOpts(PPO), CodeGenOpts(CGO), HandlingTopLevelDecls(0),
CoverageInfo(CoverageInfo),
- M(new llvm::Module(ExpandModuleName(ModuleName, CGO), C)) {
+ M(new llvm::Module(ExpandModuleName(ModuleName, CGO), C)), TM(TM) {
C.setDiscardValueNames(CGO.DiscardValueNames);
}
@@ -151,7 +157,8 @@ namespace {
void Initialize(ASTContext &Context) override {
Ctx = &Context;
- M->setTargetTriple(Ctx->getTargetInfo().getTriple().getTriple());
+ llvm::Triple TargetTriple = Ctx->getTargetInfo().getTriple();
+ M->setTargetTriple(TargetTriple.getTriple());
M->setDataLayout(Ctx->getTargetInfo().getDataLayoutString());
const auto &SDKVersion = Ctx->getTargetInfo().getSDKVersion();
if (!SDKVersion.empty())
@@ -161,9 +168,14 @@ namespace {
if (auto TVSDKVersion =
Ctx->getTargetInfo().getDarwinTargetVariantSDKVersion())
M->setDarwinTargetVariantSDKVersion(*TVSDKVersion);
- Builder.reset(new CodeGen::CodeGenModule(Context, FS, HeaderSearchOpts,
- PreprocessorOpts, CodeGenOpts,
- *M, Diags, CoverageInfo));
+
+ TLII.reset(
+ llvm::driver::createTLII(TargetTriple, CodeGenOpts.getVecLib()));
+ if (TM)
+ TL = std::make_unique<llvm::TargetLowering>(*TM);
+ Builder.reset(new CodeGen::CodeGenModule(
+ Context, FS, HeaderSearchOpts, PreprocessorOpts, CodeGenOpts, *M,
+ Diags, *TLII.get(), TL.get(), CoverageInfo));
for (auto &&Lib : CodeGenOpts.DependentLibraries)
Builder->AddDependentLib(Lib);
@@ -366,8 +378,9 @@ clang::CreateLLVMCodeGen(DiagnosticsEngine &Diags, llvm::StringRef ModuleName,
const HeaderSearchOptions &HeaderSearchOpts,
const PreprocessorOptions &PreprocessorOpts,
const CodeGenOptions &CGO, llvm::LLVMContext &C,
+ llvm::TargetMachine *TM,
CoverageSourceInfo *CoverageInfo) {
return new CodeGeneratorImpl(Diags, ModuleName, std::move(FS),
- HeaderSearchOpts, PreprocessorOpts, CGO, C,
+ HeaderSearchOpts, PreprocessorOpts, CGO, C, TM,
CoverageInfo);
}
diff --git a/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp b/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp
index 3a1f745d9ed777..fcff475c3d683d 100644
--- a/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp
+++ b/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp
@@ -21,6 +21,7 @@
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Bitstream/BitstreamReader.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/IR/Constants.h"
@@ -56,6 +57,7 @@ class PCHContainerGenerator : public ASTConsumer {
std::unique_ptr<CodeGen::CodeGenModule> Builder;
std::unique_ptr<raw_pwrite_stream> OS;
std::shared_ptr<PCHBuffer> Buffer;
+ std::unique_ptr<llvm::TargetLibraryInfoImpl> TLII;
/// Visit every type and emit debug info for it.
struct DebugTypeVisitor : public RecursiveASTVisitor<DebugTypeVisitor> {
@@ -177,8 +179,11 @@ class PCHContainerGenerator : public ASTConsumer {
VMContext.reset(new llvm::LLVMContext());
M.reset(new llvm::Module(MainFileName, *VMContext));
M->setDataLayout(Ctx->getTargetInfo().getDataLayoutString());
- Builder.reset(new CodeGen::CodeGenModule(
- *Ctx, FS, HeaderSearchOpts, PreprocessorOpts, CodeGenOpts, *M, Diags));
+ llvm::Triple TargetTriple(M->getTargetTriple());
+ TLII.reset(llvm::driver::createTLII(TargetTriple, CodeGenOpts.getVecLib()));
+ Builder.reset(new CodeGen::CodeGenModule(*Ctx, FS, HeaderSearchOpts,
+ PreprocessorOpts, CodeGenOpts, *M,
+ Diags, *TLII.get(), nullptr));
// Prepare CGDebugInfo to emit debug info for a clang module.
auto *DI = Builder->getModuleDebugInfo();
diff --git a/clang/lib/Frontend/CMakeLists.txt b/clang/lib/Frontend/CMakeLists.txt
index a9166672088459..f1ec1e52272d8a 100644
--- a/clang/lib/Frontend/CMakeLists.txt
+++ b/clang/lib/Frontend/CMakeLists.txt
@@ -51,6 +51,7 @@ add_clang_library(clangFrontend
clangAPINotes
clangAST
clangBasic
+ clangCodeGen
clangDriver
clangEdit
clangLex
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index 6242b5a7d9fe39..d39775009a725e 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -19,6 +19,7 @@
#include "clang/Basic/Stack.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/Version.h"
+#include "clang/CodeGen/BackendUtil.h"
#include "clang/Config/config.h"
#include "clang/Frontend/ChainedDiagnosticConsumer.h"
#include "clang/Frontend/FrontendAction.h"
@@ -43,6 +44,7 @@
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/BuryPointer.h"
#include "llvm/Support/CrashRecoveryContext.h"
#include "llvm/Support/Errc.h"
@@ -55,6 +57,7 @@
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/TargetParser/Host.h"
#include <optional>
#include <time.h>
@@ -154,6 +157,35 @@ bool CompilerInstance::createTarget() {
if (auto *Aux = getAuxTarget())
getTarget().setAuxTarget(Aux);
+ llvm::Triple TargetTriple = getTarget().getTriple();
+ TargetOptions &TargetOpts = getTargetOpts();
+ std::string Error;
+ const llvm::Target *TheTarget =
+ llvm::TargetRegistry::lookupTarget(TargetTriple.getTriple(), Error);
+ if (TheTarget) {
+ CodeGenOptions &CodeGenOpts = getCodeGenOpts();
+ std::optional<llvm::CodeModel::Model> CM = getCodeModel(CodeGenOpts);
+ std::string FeaturesStr =
+ llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ",");
+ llvm::Reloc::Model RM = CodeGenOpts.RelocationModel;
+ std::optional<llvm::CodeGenOptLevel> OptLevelOrNone =
+ llvm::CodeGenOpt::getLevel(CodeGenOpts.OptimizationLevel);
+ assert(OptLevelOrNone && "Invalid optimization level!");
+ llvm::CodeGenOptLevel OptLevel = *OptLevelOrNone;
+
+ llvm::TargetOptions Options;
+ bool Scc =
+ initTargetOptions(getDiagnostics(), Options, CodeGenOpts, TargetOpts,
+ getLangOpts(), getHeaderSearchOpts());
+ if (Scc) {
+ TM.reset(TheTarget->createTargetMachine(TargetTriple.getTriple(),
+ TargetOpts.CPU, FeaturesStr,
+ Options, RM, CM, OptLevel));
+ if (TM)
+ TM->setLargeDataThreshold(CodeGenOpts.LargeDataThreshold);
+ }
+ }
+
return true;
}
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cas.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cas.c
index 73ffe0694be6d2..dbd0e86ffbe18c 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cas.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cas.c
@@ -11,17 +11,26 @@
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
// CHECK-NEXT: store i32 [[C:%.*]], ptr [[C_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[C_ADDR]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg weak volatile ptr [[A_ADDR]], i32 [[TMP0]], i32 [[TMP1]] monotonic monotonic, align 4
-// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
-// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
-// CHECK-NEXT: store i32 [[TMP3]], ptr [[B_ADDR]], align 4
-// CHECK-NEXT: [[TMP5:%.*]] = zext i1 [[TMP4]] to i32
-// CHECK-NEXT: ret i32 [[TMP5]]
+// CHECK-NEXT: store volatile i32 [[TMP0]], ptr [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: store volatile i32 [[TMP1]], ptr [[ATOMIC_TEMP1]], align 4
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[ATOMIC_TEMP1]], align 4
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg weak volatile ptr [[A_ADDR]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] monotonic monotonic, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP2]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load volatile i32, ptr [[ATOMIC_TEMP2]], align 4
+// CHECK-NEXT: store i32 [[TMP2]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[CMPXCHG_SUCCESS]] to i32
+// CHECK-NEXT: ret i32 [[TMP3]]
//
int test_builtin_ppc_compare_and_swap(int a, int b, int c) {
return __compare_and_swap(&a, &b, c);
@@ -33,17 +42,26 @@ int test_builtin_ppc_compare_and_swap(int a, int b, int c) {
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i64, align 8
+// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i64, align 8
+// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i64, align 8
// CHECK-NEXT: store i64 [[A:%.*]], ptr [[A_ADDR]], align 8
// CHECK-NEXT: store i64 [[B:%.*]], ptr [[B_ADDR]], align 8
// CHECK-NEXT: store i64 [[C:%.*]], ptr [[C_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[B_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[C_ADDR]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg weak volatile ptr [[A_ADDR]], i64 [[TMP0]], i64 [[TMP1]] monotonic monotonic, align 8
-// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
-// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
-// CHECK-NEXT: store i64 [[TMP3]], ptr [[B_ADDR]], align 8
-// CHECK-NEXT: [[TMP5:%.*]] = zext i1 [[TMP4]] to i32
-// CHECK-NEXT: ret i32 [[TMP5]]
+// CHECK-NEXT: store volatile i64 [[TMP0]], ptr [[ATOMIC_TEMP]], align 8
+// CHECK-NEXT: store volatile i64 [[TMP1]], ptr [[ATOMIC_TEMP1]], align 8
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i64, ptr [[ATOMIC_TEMP]], align 8
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[ATOMIC_TEMP1]], align 8
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg weak volatile ptr [[A_ADDR]], i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] monotonic monotonic, align 8
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP2]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load volatile i64, ptr [[ATOMIC_TEMP2]], align 8
+// CHECK-NEXT: store i64 [[TMP2]], ptr [[B_ADDR]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[CMPXCHG_SUCCESS]] to i32
+// CHECK-NEXT: ret i32 [[TMP3]]
//
int test_builtin_ppc_compare_and_swaplp(long a, long b, long c) {
return __compare_and_swaplp(&a, &b, c);
diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c
index e3db2063312d2b..039e0199d13fa9 100644
--- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c
+++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c
@@ -82,16 +82,12 @@ __int128 f6() {
// CHECK-LABEL: @f7(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Des, align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Exp, align 16
-// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP1]], i128 [[TMP0]] seq_cst seq_cst, align 16
-// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1
-// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
-// CHECK: cmpxchg.store_expected:
-// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
-// CHECK-NEXT: store i128 [[TMP4]], ptr @Exp, align 16
-// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
-// CHECK: cmpxchg.continue:
-// CHECK-NEXT: ret i1 [[TMP3]]
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i128, ptr @Exp, align 16
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @Ptr, i128 [[CMPXCHG_EXPECTED]], i128 [[TMP0]] seq_cst seq_cst, align 16
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i128 [[CMPXCHG_PREV]], ptr @Exp, align 16
+// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f7() {
return __atomic_compare_exchange_n(&Ptr, &Exp, Des, 0,
@@ -100,17 +96,13 @@ _Bool f7() {
// CHECK-LABEL: @f8(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Exp, align 16
-// CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Des, align 16
-// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP0]], i128 [[TMP1]] seq_cst seq_cst, align 16
-// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1
-// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
-// CHECK: cmpxchg.store_expected:
-// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
-// CHECK-NEXT: store i128 [[TMP4]], ptr @Exp, align 16
-// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
-// CHECK: cmpxchg.continue:
-// CHECK-NEXT: ret i1 [[TMP3]]
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i128, ptr @Exp, align 16
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i128, ptr @Des, align 16
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @Ptr, i128 [[CMPXCHG_EXPECTED]], i128 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 16
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i128 [[CMPXCHG_PREV]], ptr @Exp, align 16
+// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f8() {
return __atomic_compare_exchange(&Ptr, &Exp, &Des, 0,
diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c
index 8759df7b19c638..39fdd60c8c6315 100644
--- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c
+++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c
@@ -87,16 +87,12 @@ __int128 f6() {
// CHECK-LABEL: @f7(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Des, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Exp, align 8
-// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP1]], i128 [[TMP0]] seq_cst seq_cst, align 8
-// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1
-// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
-// CHECK: cmpxchg.store_expected:
-// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
-// CHECK-NEXT: store i128 [[TMP4]], ptr @Exp, align 8
-// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
-// CHECK: cmpxchg.continue:
-// CHECK-NEXT: ret i1 [[TMP3]]
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i128, ptr @Exp, align 8
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @Ptr, i128 [[CMPXCHG_EXPECTED]], i128 [[TMP0]] seq_cst seq_cst, align 8
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i128 [[CMPXCHG_PREV]], ptr @Exp, align 8
+// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f7() {
return __atomic_compare_exchange_n(&Ptr, &Exp, Des, 0,
@@ -105,17 +101,13 @@ _Bool f7() {
// CHECK-LABEL: @f8(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Exp, align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Des, align 8
-// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP0]], i128 [[TMP1]] seq_cst seq_cst, align 8
-// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1
-// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
-// CHECK: cmpxchg.store_expected:
-// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
-// CHECK-NEXT: store i128 [[TMP4]], ptr @Exp, align 8
-// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
-// CHECK: cmpxchg.continue:
-// CHECK-NEXT: ret i1 [[TMP3]]
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i128, ptr @Exp, align 8
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i128, ptr @Des, align 8
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @Ptr, i128 [[CMPXCHG_EXPECTED]], i128 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 8
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i128 [[CMPXCHG_PREV]], ptr @Exp, align 8
+// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f8() {
return __atomic_compare_exchange(&Ptr, &Exp, &Des, 0,
diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i16.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i16.c
index 7c6a82f14197a1..4320c2736350c9 100644
--- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i16.c
+++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i16.c
@@ -68,16 +68,12 @@ int16_t f6(int16_t *Ptr, int16_t *Val, int16_t *Ret) {
// CHECK-LABEL: @f7(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[EXP:%.*]], align 2
-// CHECK-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[PTR:%.*]], i16 [[TMP0]], i16 [[DES:%.*]] seq_cst seq_cst, align 2
-// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i16, i1 } [[TMP1]], 1
-// CHECK-NEXT: br i1 [[TMP2]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
-// CHECK: cmpxchg.store_expected:
-// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i16, i1 } [[TMP1]], 0
-// CHECK-NEXT: store i16 [[TMP3]], ptr [[EXP]], align 2
-// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
-// CHECK: cmpxchg.continue:
-// CHECK-NEXT: ret i1 [[TMP2]]
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i16, ptr [[EXP:%.*]], align 2
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i16 [[CMPXCHG_EXPECTED]], i16 [[DES:%.*]] seq_cst seq_cst, align 2
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i16 [[CMPXCHG_PREV]], ptr [[EXP]], align 2
+// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f7(int16_t *Ptr, int16_t *Exp, int16_t Des) {
return __atomic_compare_exchange_n(Ptr, Exp, Des, 0,
@@ -86,17 +82,13 @@ _Bool f7(int16_t *Ptr, int16_t *Exp, int16_t Des) {
// CHECK-LABEL: @f8(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[EXP:%.*]], align 2
-// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[DES:%.*]], align 2
-// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR:%.*]], i16 [[TMP0]], i16 [[TMP1]] seq_cst seq_cst, align 2
-// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i16, i1 } [[TMP2]], 1
-// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
-// CHECK: cmpxchg.store_expected:
-// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i16, i1 } [[TMP2]], 0
-// CHECK-NEXT: store i16 [[TMP4]], ptr [[EXP]], align 2
-// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
-// CHECK: cmpxchg.continue:
-// CHECK-NEXT: ret i1 [[TMP3]]
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i16, ptr [[EXP:%.*]], align 2
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i16, ptr [[DES:%.*]], align 2
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i16 [[CMPXCHG_EXPECTED]], i16 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 2
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i16 [[CMPXCHG_PREV]], ptr [[EXP]], align 2
+// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f8(int16_t *Ptr, int16_t *Exp, int16_t *Des) {
return __atomic_compare_exchange(Ptr, Exp, Des, 0,
diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i32.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i32.c
index ba630e7c952e5b..8691228924f186 100644
--- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i32.c
+++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i32.c
@@ -68,16 +68,12 @@ int32_t f6(int32_t *Ptr, int32_t *Val, int32_t *Ret) {
// CHECK-LABEL: @f7(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[EXP:%.*]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[PTR:%.*]], i32 [[TMP0]], i32 [[DES:%.*]] seq_cst seq_cst, align 4
-// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
-// CHECK-NEXT: br i1 [[TMP2]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
-// CHECK: cmpxchg.store_expected:
-// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
-// CHECK-NEXT: store i32 [[TMP3]], ptr [[EXP]], align 4
-// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
-// CHECK: cmpxchg.continue:
-// CHECK-NEXT: ret i1 [[TMP2]]
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[EXP:%.*]], align 4
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i32 [[CMPXCHG_EXPECTED]], i32 [[DES:%.*]] seq_cst seq_cst, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[EXP]], align 4
+// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f7(int32_t *Ptr, int32_t *Exp, int32_t Des) {
return __atomic_compare_exchange_n(Ptr, Exp, Des, 0,
@@ -86,17 +82,13 @@ _Bool f7(int32_t *Ptr, int32_t *Exp, int32_t Des) {
// CHECK-LABEL: @f8(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[EXP:%.*]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DES:%.*]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR:%.*]], i32 [[TMP0]], i32 [[TMP1]] seq_cst seq_cst, align 4
-// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
-// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
-// CHECK: cmpxchg.store_expected:
-// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
-// CHECK-NEXT: store i32 [[TMP4]], ptr [[EXP]], align 4
-// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
-// CHECK: cmpxchg.continue:
-// CHECK-NEXT: ret i1 [[TMP3]]
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[EXP:%.*]], align 4
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[DES:%.*]], align 4
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[EXP]], align 4
+// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f8(int32_t *Ptr, int32_t *Exp, int32_t *Des) {
return __atomic_compare_exchange(Ptr, Exp, Des, 0,
diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i64.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i64.c
index 25c69ee8c54bf5..6d5a503df35d38 100644
--- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i64.c
+++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i64.c
@@ -68,16 +68,12 @@ int64_t f6(int64_t *Ptr, int64_t *Val, int64_t *Ret) {
// CHECK-LABEL: @f7(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[EXP:%.*]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[PTR:%.*]], i64 [[TMP0]], i64 [[DES:%.*]] seq_cst seq_cst, align 8
-// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
-// CHECK-NEXT: br i1 [[TMP2]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
-// CHECK: cmpxchg.store_expected:
-// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
-// CHECK-NEXT: store i64 [[TMP3]], ptr [[EXP]], align 8
-// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
-// CHECK: cmpxchg.continue:
-// CHECK-NEXT: ret i1 [[TMP2]]
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i64, ptr [[EXP:%.*]], align 8
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i64 [[CMPXCHG_EXPECTED]], i64 [[DES:%.*]] seq_cst seq_cst, align 8
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[EXP]], align 8
+// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f7(int64_t *Ptr, int64_t *Exp, int64_t Des) {
return __atomic_compare_exchange_n(Ptr, Exp, Des, 0,
@@ -86,17 +82,13 @@ _Bool f7(int64_t *Ptr, int64_t *Exp, int64_t Des) {
// CHECK-LABEL: @f8(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[EXP:%.*]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[DES:%.*]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR:%.*]], i64 [[TMP0]], i64 [[TMP1]] seq_cst seq_cst, align 8
-// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
-// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
-// CHECK: cmpxchg.store_expected:
-// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
-// CHECK-NEXT: store i64 [[TMP4]], ptr [[EXP]], align 8
-// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
-// CHECK: cmpxchg.continue:
-// CHECK-NEXT: ret i1 [[TMP3]]
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i64, ptr [[EXP:%.*]], align 8
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[DES:%.*]], align 8
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 8
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[EXP]], align 8
+// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f8(int64_t *Ptr, int64_t *Exp, int64_t *Des) {
return __atomic_compare_exchange(Ptr, Exp, Des, 0,
diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i8.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i8.c
index 1f4b455bc02610..9bbc61a7800068 100644
--- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i8.c
+++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i8.c
@@ -68,16 +68,12 @@ int8_t f6(int8_t *Ptr, int8_t *Val, int8_t *Ret) {
// CHECK-LABEL: @f7(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[EXP:%.*]], align 1
-// CHECK-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[PTR:%.*]], i8 [[TMP0]], i8 [[DES:%.*]] seq_cst seq_cst, align 1
-// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i8, i1 } [[TMP1]], 1
-// CHECK-NEXT: br i1 [[TMP2]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
-// CHECK: cmpxchg.store_expected:
-// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i8, i1 } [[TMP1]], 0
-// CHECK-NEXT: store i8 [[TMP3]], ptr [[EXP]], align 1
-// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
-// CHECK: cmpxchg.continue:
-// CHECK-NEXT: ret i1 [[TMP2]]
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i8, ptr [[EXP:%.*]], align 1
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i8 [[CMPXCHG_EXPECTED]], i8 [[DES:%.*]] seq_cst seq_cst, align 1
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i8 [[CMPXCHG_PREV]], ptr [[EXP]], align 1
+// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f7(int8_t *Ptr, int8_t *Exp, int8_t Des) {
return __atomic_compare_exchange_n(Ptr, Exp, Des, 0,
@@ -86,17 +82,13 @@ _Bool f7(int8_t *Ptr, int8_t *Exp, int8_t Des) {
// CHECK-LABEL: @f8(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[EXP:%.*]], align 1
-// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[DES:%.*]], align 1
-// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR:%.*]], i8 [[TMP0]], i8 [[TMP1]] seq_cst seq_cst, align 1
-// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i8, i1 } [[TMP2]], 1
-// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
-// CHECK: cmpxchg.store_expected:
-// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i8, i1 } [[TMP2]], 0
-// CHECK-NEXT: store i8 [[TMP4]], ptr [[EXP]], align 1
-// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
-// CHECK: cmpxchg.continue:
-// CHECK-NEXT: ret i1 [[TMP3]]
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i8, ptr [[EXP:%.*]], align 1
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i8, ptr [[DES:%.*]], align 1
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i8 [[CMPXCHG_EXPECTED]], i8 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 1
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i8 [[CMPXCHG_PREV]], ptr [[EXP]], align 1
+// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f8(int8_t *Ptr, int8_t *Exp, int8_t *Des) {
return __atomic_compare_exchange(Ptr, Exp, Des, 0,
diff --git a/clang/test/CodeGen/X86/x86-atomic-long_double.c b/clang/test/CodeGen/X86/x86-atomic-long_double.c
index 2c3f381f13511e..c514ff76efbfa9 100644
--- a/clang/test/CodeGen/X86/x86-atomic-long_double.c
+++ b/clang/test/CodeGen/X86/x86-atomic-long_double.c
@@ -1,5 +1,5 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -target-cpu core2 %s -emit-llvm -o - | FileCheck --check-prefixes=X64 %s
+// : %clang_cc1 -triple x86_64-linux-gnu -target-cpu core2 %s -emit-llvm -o - | FileCheck --check-prefixes=X64 %s
// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu core2 %s -emit-llvm -o - | FileCheck --check-prefixes=X86 %s
// X64-LABEL: define dso_local x86_fp80 @testinc(
@@ -107,6 +107,7 @@ long double testdec(_Atomic long double *addr) {
// X86-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4
// X86-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 4
// X86-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 4
+// X86-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca x86_fp80, align 4
// X86-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
// X86-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
@@ -119,13 +120,15 @@ long double testdec(_Atomic long double *addr) {
// X86-NEXT: store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 4
// X86-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP2]], i8 0, i64 12, i1 false)
// X86-NEXT: store x86_fp80 [[SUB]], ptr [[ATOMIC_TEMP2]], align 4
-// X86-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP1]], ptr noundef [[ATOMIC_TEMP2]], i32 noundef 5, i32 noundef 5)
-// X86-NEXT: [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4
-// X86-NEXT: br i1 [[CALL]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// X86-NEXT: [[__ATOMIC_COMPARE_EXCHANGE:%.*]] = call i8 @__atomic_compare_exchange(i32 12, ptr [[TMP0]], ptr [[ATOMIC_TEMP1]], ptr [[ATOMIC_TEMP2]], i32 5, i32 5)
+// X86-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE]], 0
+// X86-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP3]], ptr [[ATOMIC_TEMP1]], i64 12, i1 false)
+// X86-NEXT: [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 4
+// X86-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
// X86: [[ATOMIC_CONT]]:
// X86-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
-// X86-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef 5)
-// X86-NEXT: [[TMP5:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 4
+// X86-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP4]], i32 noundef 5)
+// X86-NEXT: [[TMP5:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP4]], align 4
// X86-NEXT: ret x86_fp80 [[TMP5]]
//
long double testcompassign(_Atomic long double *addr) {
@@ -267,7 +270,6 @@ long double test_volatile_dec(volatile _Atomic long double *addr) {
// X64-NEXT: store i128 [[ATOMIC_LOAD4]], ptr [[ATOMIC_TEMP5]], align 16
// X64-NEXT: [[TMP10:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP5]], align 16
// X64-NEXT: ret x86_fp80 [[TMP10]]
-//
// X86-LABEL: define dso_local x86_fp80 @test_volatile_compassign(
// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
// X86-NEXT: [[ENTRY:.*]]:
@@ -276,6 +278,7 @@ long double test_volatile_dec(volatile _Atomic long double *addr) {
// X86-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4
// X86-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 4
// X86-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 4
+// X86-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca x86_fp80, align 4
// X86-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
// X86-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
@@ -288,13 +291,15 @@ long double test_volatile_dec(volatile _Atomic long double *addr) {
// X86-NEXT: store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 4
// X86-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP2]], i8 0, i64 12, i1 false)
// X86-NEXT: store x86_fp80 [[SUB]], ptr [[ATOMIC_TEMP2]], align 4
-// X86-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP1]], ptr noundef [[ATOMIC_TEMP2]], i32 noundef 5, i32 noundef 5)
-// X86-NEXT: [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4
-// X86-NEXT: br i1 [[CALL]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// X86-NEXT: [[__ATOMIC_COMPARE_EXCHANGE:%.*]] = call i8 @__atomic_compare_exchange(i32 12, ptr [[TMP0]], ptr [[ATOMIC_TEMP1]], ptr [[ATOMIC_TEMP2]], i32 5, i32 5)
+// X86-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE]], 0
+// X86-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP3]], ptr [[ATOMIC_TEMP1]], i64 12, i1 false)
+// X86-NEXT: [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 4
+// X86-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
// X86: [[ATOMIC_CONT]]:
// X86-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
-// X86-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef 5)
-// X86-NEXT: [[TMP5:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 4
+// X86-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP4]], i32 noundef 5)
+// X86-NEXT: [[TMP5:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP4]], align 4
// X86-NEXT: ret x86_fp80 [[TMP5]]
//
long double test_volatile_compassign(volatile _Atomic long double *addr) {
@@ -319,7 +324,6 @@ long double test_volatile_compassign(volatile _Atomic long double *addr) {
// X64-NEXT: store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP1]], align 16
// X64-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 16
// X64-NEXT: ret x86_fp80 [[TMP3]]
-//
// X86-LABEL: define dso_local x86_fp80 @test_volatile_assign(
// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
// X86-NEXT: [[ENTRY:.*:]]
diff --git a/clang/test/CodeGen/atomic-ops.c b/clang/test/CodeGen/atomic-ops.c
index b6060dcc540f90..7bb946cc509e44 100644
--- a/clang/test/CodeGen/atomic-ops.c
+++ b/clang/test/CodeGen/atomic-ops.c
@@ -1,3 +1,4 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 %s -emit-llvm -o - -ffreestanding -ffake-address-space-map -triple=i686-apple-darwin9 | FileCheck %s
// REQUIRES: x86-registered-target
@@ -13,165 +14,99 @@
// Basic IRGen tests for __c11_atomic_* and GNU __atomic_*
int fi1(_Atomic(int) *i) {
- // CHECK-LABEL: @fi1
- // CHECK: load atomic i32, ptr {{.*}} seq_cst, align 4
return __c11_atomic_load(i, memory_order_seq_cst);
}
int fi1a(int *i) {
- // CHECK-LABEL: @fi1a
- // CHECK: load atomic i32, ptr {{.*}} seq_cst, align 4
int v;
__atomic_load(i, &v, memory_order_seq_cst);
return v;
}
int fi1b(int *i) {
- // CHECK-LABEL: @fi1b
- // CHECK: load atomic i32, ptr {{.*}} seq_cst, align 4
return __atomic_load_n(i, memory_order_seq_cst);
}
int fi1c(atomic_int *i) {
- // CHECK-LABEL: @fi1c
- // CHECK: load atomic i32, ptr {{.*}} seq_cst, align 4
return atomic_load(i);
}
void fi2(_Atomic(int) *i) {
- // CHECK-LABEL: @fi2
- // CHECK: store atomic i32 {{.*}} seq_cst, align 4
__c11_atomic_store(i, 1, memory_order_seq_cst);
}
void fi2a(int *i) {
- // CHECK-LABEL: @fi2a
- // CHECK: store atomic i32 {{.*}} seq_cst, align 4
int v = 1;
__atomic_store(i, &v, memory_order_seq_cst);
}
void fi2b(int *i) {
- // CHECK-LABEL: @fi2b
- // CHECK: store atomic i32 {{.*}} seq_cst, align 4
__atomic_store_n(i, 1, memory_order_seq_cst);
}
void fi2c(atomic_int *i) {
- // CHECK-LABEL: @fi2c
- // CHECK: store atomic i32 {{.*}} seq_cst, align 4
atomic_store(i, 1);
}
int fi3(_Atomic(int) *i) {
- // CHECK-LABEL: @fi3
- // CHECK: atomicrmw and {{.*}} seq_cst, align 4
- // CHECK-NOT: and
return __c11_atomic_fetch_and(i, 1, memory_order_seq_cst);
}
int fi3a(int *i) {
- // CHECK-LABEL: @fi3a
- // CHECK: atomicrmw xor {{.*}} seq_cst, align 4
- // CHECK-NOT: xor
return __atomic_fetch_xor(i, 1, memory_order_seq_cst);
}
int fi3b(int *i) {
- // CHECK-LABEL: @fi3b
- // CHECK: atomicrmw add {{.*}} seq_cst, align 4
- // CHECK: add
return __atomic_add_fetch(i, 1, memory_order_seq_cst);
}
int fi3c(int *i) {
- // CHECK-LABEL: @fi3c
- // CHECK: atomicrmw nand {{.*}} seq_cst, align 4
- // CHECK-NOT: and
return __atomic_fetch_nand(i, 1, memory_order_seq_cst);
}
int fi3d(int *i) {
- // CHECK-LABEL: @fi3d
- // CHECK: atomicrmw nand {{.*}} seq_cst, align 4
- // CHECK: and
- // CHECK: xor
return __atomic_nand_fetch(i, 1, memory_order_seq_cst);
}
int fi3e(atomic_int *i) {
- // CHECK-LABEL: @fi3e
- // CHECK: atomicrmw or {{.*}} seq_cst, align 4
- // CHECK-NOT: {{ or }}
return atomic_fetch_or(i, 1);
}
int fi3f(int *i) {
- // CHECK-LABEL: @fi3f
- // CHECK-NOT: store volatile
- // CHECK: atomicrmw or {{.*}} seq_cst, align 4
- // CHECK-NOT: {{ or }}
return __atomic_fetch_or(i, (short)1, memory_order_seq_cst);
}
_Bool fi4(_Atomic(int) *i) {
- // CHECK-LABEL: @fi4(
- // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg ptr [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] acquire acquire, align 4
- // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
- // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
- // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
- // CHECK: store i32 [[OLD]]
int cmp = 0;
return __c11_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire);
}
_Bool fi4a(int *i) {
- // CHECK-LABEL: @fi4a
- // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg ptr [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] acquire acquire, align 4
- // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
- // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
- // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
- // CHECK: store i32 [[OLD]]
int cmp = 0;
int desired = 1;
return __atomic_compare_exchange(i, &cmp, &desired, 0, memory_order_acquire, memory_order_acquire);
}
_Bool fi4b(int *i) {
- // CHECK-LABEL: @fi4b(
- // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg weak ptr [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] acquire acquire, align 4
- // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
- // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
- // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
- // CHECK: store i32 [[OLD]]
int cmp = 0;
return __atomic_compare_exchange_n(i, &cmp, 1, 1, memory_order_acquire, memory_order_acquire);
}
_Bool fi4c(atomic_int *i) {
- // CHECK-LABEL: @fi4c
- // CHECK: cmpxchg ptr {{.*}} seq_cst seq_cst, align 4
int cmp = 0;
return atomic_compare_exchange_strong(i, &cmp, 1);
}
#define _AS1 __attribute__((address_space(1)))
_Bool fi4d(_Atomic(int) *i, int _AS1 *ptr2) {
- // CHECK-LABEL: @fi4d(
- // CHECK: [[EXPECTED:%[.0-9A-Z_a-z]+]] = load i32, ptr addrspace(1) %{{[0-9]+}}
- // CHECK: cmpxchg ptr %{{[0-9]+}}, i32 [[EXPECTED]], i32 %{{[0-9]+}} acquire acquire, align 4
return __c11_atomic_compare_exchange_strong(i, ptr2, 1, memory_order_acquire, memory_order_acquire);
}
float ff1(_Atomic(float) *d) {
- // CHECK-LABEL: @ff1
- // CHECK: load atomic i32, ptr {{.*}} monotonic, align 4
return __c11_atomic_load(d, memory_order_relaxed);
}
void ff2(_Atomic(float) *d) {
- // CHECK-LABEL: @ff2
- // CHECK: store atomic i32 {{.*}} release, align 4
__c11_atomic_store(d, 1, memory_order_release);
}
@@ -184,135 +119,70 @@ struct S {
};
void implicit_store(_Atomic(struct S) *a, struct S s) {
- // CHECK-LABEL: @implicit_store(
- // CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} seq_cst, align 8
*a = s;
}
struct S implicit_load(_Atomic(struct S) *a) {
- // CHECK-LABEL: @implicit_load(
- // CHECK: load atomic i64, ptr %{{.*}} seq_cst, align 8
return *a;
}
struct S fd1(struct S *a) {
- // CHECK-LABEL: @fd1
- // CHECK: [[RETVAL:%.*]] = alloca %struct.S, align 4
- // CHECK: [[TMP1:%.*]] = load atomic i64, ptr {{%.*}} seq_cst, align 4
- // CHECK-NEXT: store i64 [[TMP1]], ptr [[RETVAL]], align 4
- // CHECK: ret
struct S ret;
__atomic_load(a, &ret, memory_order_seq_cst);
return ret;
}
void fd2(struct S *a, struct S *b) {
- // CHECK-LABEL: @fd2
- // CHECK: [[A_ADDR:%.*]] = alloca ptr, align 4
- // CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
- // CHECK-NEXT: store ptr %a, ptr [[A_ADDR]], align 4
- // CHECK-NEXT: store ptr %b, ptr [[B_ADDR]], align 4
- // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load ptr, ptr [[A_ADDR]], align 4
- // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load ptr, ptr [[B_ADDR]], align 4
- // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, ptr [[LOAD_B_PTR]], align 4
- // CHECK-NEXT: store atomic i64 [[LOAD_B]], ptr [[LOAD_A_PTR]] seq_cst, align 4
- // CHECK-NEXT: ret void
__atomic_store(a, b, memory_order_seq_cst);
}
void fd3(struct S *a, struct S *b, struct S *c) {
- // CHECK-LABEL: @fd3
- // CHECK: [[A_ADDR:%.*]] = alloca ptr, align 4
- // CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
- // CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
- // CHECK-NEXT: store ptr %a, ptr [[A_ADDR]], align 4
- // CHECK-NEXT: store ptr %b, ptr [[B_ADDR]], align 4
- // CHECK-NEXT: store ptr %c, ptr [[C_ADDR]], align 4
- // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load ptr, ptr [[A_ADDR]], align 4
- // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load ptr, ptr [[B_ADDR]], align 4
- // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load ptr, ptr [[C_ADDR]], align 4
- // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, ptr [[LOAD_B_PTR]], align 4
- // CHECK-NEXT: [[RESULT:%.*]] = atomicrmw xchg ptr [[LOAD_A_PTR]], i64 [[LOAD_B]] seq_cst, align 4
- // CHECK-NEXT: store i64 [[RESULT]], ptr [[LOAD_C_PTR]], align 4
__atomic_exchange(a, b, c, memory_order_seq_cst);
}
_Bool fd4(struct S *a, struct S *b, struct S *c) {
- // CHECK-LABEL: @fd4
- // CHECK: [[A_ADDR:%.*]] = alloca ptr, align 4
- // CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
- // CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
- // CHECK: store ptr %a, ptr [[A_ADDR]], align 4
- // CHECK-NEXT: store ptr %b, ptr [[B_ADDR]], align 4
- // CHECK-NEXT: store ptr %c, ptr [[C_ADDR]], align 4
- // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load ptr, ptr [[A_ADDR]], align 4
- // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load ptr, ptr [[B_ADDR]], align 4
- // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load ptr, ptr [[C_ADDR]], align 4
- // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, ptr [[LOAD_B_PTR]], align 4
- // CHECK-NEXT: [[LOAD_C:%.*]] = load i64, ptr [[LOAD_C_PTR]], align 4
- // CHECK-NEXT: {{.*}} = cmpxchg weak ptr [[LOAD_A_PTR]], i64 [[LOAD_B]], i64 [[LOAD_C]] seq_cst seq_cst, align 4
return __atomic_compare_exchange(a, b, c, 1, 5, 5);
}
int* fp1(_Atomic(int*) *p) {
- // CHECK-LABEL: @fp1
- // CHECK: load atomic i32, ptr {{.*}} seq_cst, align 4
return __c11_atomic_load(p, memory_order_seq_cst);
}
int* fp2(_Atomic(int*) *p) {
- // CHECK-LABEL: @fp2
- // CHECK: store i32 4
- // CHECK: atomicrmw add {{.*}} monotonic, align 4
return __c11_atomic_fetch_add(p, 1, memory_order_relaxed);
}
int *fp2a(int **p) {
- // CHECK-LABEL: @fp2a
- // CHECK: store i32 4
- // CHECK: atomicrmw sub {{.*}} monotonic, align 4
// Note, the GNU builtins do not multiply by sizeof(T)!
return __atomic_fetch_sub(p, 4, memory_order_relaxed);
}
_Complex float fc(_Atomic(_Complex float) *c) {
- // CHECK-LABEL: @fc
- // CHECK: atomicrmw xchg ptr {{.*}} seq_cst, align 8
return __c11_atomic_exchange(c, 2, memory_order_seq_cst);
}
typedef struct X { int x; } X;
X fs(_Atomic(X) *c) {
- // CHECK-LABEL: @fs
- // CHECK: atomicrmw xchg ptr {{.*}} seq_cst, align 4
return __c11_atomic_exchange(c, (X){2}, memory_order_seq_cst);
}
X fsa(X *c, X *d) {
- // CHECK-LABEL: @fsa
- // CHECK: atomicrmw xchg ptr {{.*}} seq_cst, align 4
X ret;
__atomic_exchange(c, d, &ret, memory_order_seq_cst);
return ret;
}
_Bool fsb(_Bool *c) {
- // CHECK-LABEL: @fsb
- // CHECK: atomicrmw xchg ptr {{.*}} seq_cst, align 1
return __atomic_exchange_n(c, 1, memory_order_seq_cst);
}
char flag1;
volatile char flag2;
void test_and_set(void) {
- // CHECK: atomicrmw xchg ptr @flag1, i8 1 seq_cst, align 1
__atomic_test_and_set(&flag1, memory_order_seq_cst);
- // CHECK: atomicrmw volatile xchg ptr @flag2, i8 1 acquire, align 1
__atomic_test_and_set(&flag2, memory_order_acquire);
- // CHECK: store atomic volatile i8 0, ptr @flag2 release, align 1
__atomic_clear(&flag2, memory_order_release);
- // CHECK: store atomic i8 0, ptr @flag1 seq_cst, align 1
__atomic_clear(&flag1, memory_order_seq_cst);
}
@@ -326,25 +196,18 @@ struct Seventeen {
struct Incomplete;
int lock_free(struct Incomplete *incomplete) {
- // CHECK-LABEL: @lock_free
- // CHECK: call zeroext i1 @__atomic_is_lock_free(i32 noundef 3, ptr noundef null)
__c11_atomic_is_lock_free(3);
- // CHECK: call zeroext i1 @__atomic_is_lock_free(i32 noundef 16, ptr noundef {{.*}}@sixteen{{.*}})
__atomic_is_lock_free(16, &sixteen);
- // CHECK: call zeroext i1 @__atomic_is_lock_free(i32 noundef 17, ptr noundef {{.*}}@seventeen{{.*}})
__atomic_is_lock_free(17, &seventeen);
- // CHECK: call zeroext i1 @__atomic_is_lock_free(i32 noundef 4, {{.*}})
__atomic_is_lock_free(4, incomplete);
char cs[20];
- // CHECK: call zeroext i1 @__atomic_is_lock_free(i32 noundef 4, {{.*}})
__atomic_is_lock_free(4, cs+1);
- // CHECK-NOT: call
__atomic_always_lock_free(3, 0);
__atomic_always_lock_free(16, 0);
__atomic_always_lock_free(17, 0);
@@ -354,7 +217,6 @@ int lock_free(struct Incomplete *incomplete) {
int n;
__atomic_is_lock_free(4, &n);
- // CHECK: ret i32 1
return __c11_atomic_is_lock_free(sizeof(_Atomic(int)));
}
@@ -374,229 +236,92 @@ struct foo bigThing;
_Atomic(struct foo) bigAtomic;
void structAtomicStore(void) {
- // CHECK-LABEL: @structAtomicStore
struct foo f = {0};
struct bar b = {0};
__atomic_store(&smallThing, &b, 5);
- // CHECK: call void @__atomic_store(i32 noundef 3, ptr noundef @smallThing
__atomic_store(&bigThing, &f, 5);
- // CHECK: call void @__atomic_store(i32 noundef 512, ptr noundef @bigThing
}
void structAtomicLoad(void) {
- // CHECK-LABEL: @structAtomicLoad
struct bar b;
__atomic_load(&smallThing, &b, 5);
- // CHECK: call void @__atomic_load(i32 noundef 3, ptr noundef @smallThing
struct foo f = {0};
__atomic_load(&bigThing, &f, 5);
- // CHECK: call void @__atomic_load(i32 noundef 512, ptr noundef @bigThing
}
struct foo structAtomicExchange(void) {
- // CHECK-LABEL: @structAtomicExchange
struct foo f = {0};
struct foo old;
__atomic_exchange(&f, &bigThing, &old, 5);
- // CHECK: call void @__atomic_exchange(i32 noundef 512, {{.*}}, ptr noundef @bigThing,
return __c11_atomic_exchange(&bigAtomic, f, 5);
- // CHECK: call void @__atomic_exchange(i32 noundef 512, ptr noundef @bigAtomic,
}
int structAtomicCmpExchange(void) {
- // CHECK-LABEL: @structAtomicCmpExchange
- // CHECK: %[[x_mem:.*]] = alloca i8
_Bool x = __atomic_compare_exchange(&smallThing, &thing1, &thing2, 1, 5, 5);
- // CHECK: %[[call1:.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 3, {{.*}} @smallThing{{.*}} @thing1{{.*}} @thing2
- // CHECK: %[[zext1:.*]] = zext i1 %[[call1]] to i8
- // CHECK: store i8 %[[zext1]], ptr %[[x_mem]], align 1
- // CHECK: %[[x:.*]] = load i8, ptr %[[x_mem]]
- // CHECK: %[[x_bool:.*]] = trunc i8 %[[x]] to i1
- // CHECK: %[[conv1:.*]] = zext i1 %[[x_bool]] to i32
struct foo f = {0};
struct foo g = {0};
g.big[12] = 12;
return x & __c11_atomic_compare_exchange_strong(&bigAtomic, &f, g, 5, 5);
- // CHECK: %[[call2:.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 512, ptr noundef @bigAtomic,
- // CHECK: %[[conv2:.*]] = zext i1 %[[call2]] to i32
- // CHECK: %[[and:.*]] = and i32 %[[conv1]], %[[conv2]]
- // CHECK: ret i32 %[[and]]
}
// Check that no atomic operations are used in any initialisation of _Atomic
// types.
_Atomic(int) atomic_init_i = 42;
-// CHECK-LABEL: @atomic_init_foo
void atomic_init_foo(void)
{
- // CHECK-NOT: }
- // CHECK-NOT: atomic
- // CHECK: store
_Atomic(int) j = 12;
- // CHECK-NOT: }
- // CHECK-NOT: atomic
- // CHECK: store
__c11_atomic_init(&j, 42);
- // CHECK-NOT: atomic
- // CHECK: }
}
-// CHECK-LABEL: @failureOrder
void failureOrder(_Atomic(int) *ptr, int *ptr2) {
__c11_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed);
- // CHECK: cmpxchg ptr {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} acquire monotonic, align 4
__c11_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire);
- // CHECK: cmpxchg weak ptr {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} seq_cst acquire, align 4
// Unknown ordering: conservatively pick strongest valid option (for now!).
__atomic_compare_exchange(ptr2, ptr2, ptr2, 0, memory_order_acq_rel, *ptr2);
- // CHECK: cmpxchg ptr {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} acq_rel acquire, align 4
// Undefined behaviour: don't really care what that last ordering is so leave
// it out:
__atomic_compare_exchange_n(ptr2, ptr2, 43, 1, memory_order_seq_cst, 42);
- // CHECK: cmpxchg weak ptr {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} seq_cst {{.*}}, align 4
}
-// CHECK-LABEL: @generalFailureOrder
void generalFailureOrder(_Atomic(int) *ptr, int *ptr2, int success, int fail) {
__c11_atomic_compare_exchange_strong(ptr, ptr2, 42, success, fail);
- // CHECK: switch i32 {{.*}}, label %[[MONOTONIC:[0-9a-zA-Z._]+]] [
- // CHECK-NEXT: i32 1, label %[[ACQUIRE:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 2, label %[[ACQUIRE]]
- // CHECK-NEXT: i32 3, label %[[RELEASE:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 4, label %[[ACQREL:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 5, label %[[SEQCST:[0-9a-zA-Z._]+]]
-
- // CHECK: [[MONOTONIC]]
- // CHECK: switch {{.*}}, label %[[MONOTONIC_MONOTONIC:[0-9a-zA-Z._]+]] [
- // CHECK-NEXT: i32 1, label %[[MONOTONIC_ACQUIRE:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 2, label %[[MONOTONIC_ACQUIRE:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 5, label %[[MONOTONIC_SEQCST:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: ]
-
- // CHECK: [[ACQUIRE]]
- // CHECK: switch {{.*}}, label %[[ACQUIRE_MONOTONIC:[0-9a-zA-Z._]+]] [
- // CHECK-NEXT: i32 1, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 2, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 5, label %[[ACQUIRE_SEQCST:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: ]
-
- // CHECK: [[RELEASE]]
- // CHECK: switch {{.*}}, label %[[RELEASE_MONOTONIC:[0-9a-zA-Z._]+]] [
- // CHECK-NEXT: i32 1, label %[[RELEASE_ACQUIRE:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 2, label %[[RELEASE_ACQUIRE:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 5, label %[[RELEASE_SEQCST:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: ]
-
- // CHECK: [[ACQREL]]
- // CHECK: switch {{.*}}, label %[[ACQREL_MONOTONIC:[0-9a-zA-Z._]+]] [
- // CHECK-NEXT: i32 1, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 2, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 5, label %[[ACQREL_SEQCST:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: ]
-
- // CHECK: [[SEQCST]]
- // CHECK: switch {{.*}}, label %[[SEQCST_MONOTONIC:[0-9a-zA-Z._]+]] [
- // CHECK-NEXT: i32 1, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 2, label %[[SEQCST_ACQUIRE]]
- // CHECK-NEXT: i32 5, label %[[SEQCST_SEQCST:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: ]
-
- // CHECK: [[MONOTONIC_MONOTONIC]]
- // CHECK: cmpxchg {{.*}} monotonic monotonic, align
- // CHECK: br
-
- // CHECK: [[MONOTONIC_ACQUIRE]]
- // CHECK: cmpxchg {{.*}} monotonic acquire, align
- // CHECK: br
-
- // CHECK: [[MONOTONIC_SEQCST]]
- // CHECK: cmpxchg {{.*}} monotonic seq_cst, align
- // CHECK: br
-
- // CHECK: [[ACQUIRE_MONOTONIC]]
- // CHECK: cmpxchg {{.*}} acquire monotonic, align
- // CHECK: br
-
- // CHECK: [[ACQUIRE_ACQUIRE]]
- // CHECK: cmpxchg {{.*}} acquire acquire, align
- // CHECK: br
-
- // CHECK: [[ACQUIRE_SEQCST]]
- // CHECK: cmpxchg {{.*}} acquire seq_cst, align
- // CHECK: br
-
- // CHECK: [[RELEASE_MONOTONIC]]
- // CHECK: cmpxchg {{.*}} release monotonic, align
- // CHECK: br
-
- // CHECK: [[RELEASE_ACQUIRE]]
- // CHECK: cmpxchg {{.*}} release acquire, align
- // CHECK: br
-
- // CHECK: [[RELEASE_SEQCST]]
- // CHECK: cmpxchg {{.*}} release seq_cst, align
- // CHECK: br
-
- // CHECK: [[ACQREL_MONOTONIC]]
- // CHECK: cmpxchg {{.*}} acq_rel monotonic, align
- // CHECK: br
-
- // CHECK: [[ACQREL_ACQUIRE]]
- // CHECK: cmpxchg {{.*}} acq_rel acquire, align
- // CHECK: br
-
- // CHECK: [[ACQREL_SEQCST]]
- // CHECK: cmpxchg {{.*}} acq_rel seq_cst, align
- // CHECK: br
-
- // CHECK: [[SEQCST_MONOTONIC]]
- // CHECK: cmpxchg {{.*}} seq_cst monotonic, align
- // CHECK: br
-
- // CHECK: [[SEQCST_ACQUIRE]]
- // CHECK: cmpxchg {{.*}} seq_cst acquire, align
- // CHECK: br
-
- // CHECK: [[SEQCST_SEQCST]]
- // CHECK: cmpxchg {{.*}} seq_cst seq_cst, align
- // CHECK: br
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
}
void generalWeakness(int *ptr, int *ptr2, _Bool weak) {
__atomic_compare_exchange_n(ptr, ptr2, 42, weak, memory_order_seq_cst, memory_order_seq_cst);
- // CHECK: switch i1 {{.*}}, label %[[WEAK:[0-9a-zA-Z._]+]] [
- // CHECK-NEXT: i1 false, label %[[STRONG:[0-9a-zA-Z._]+]]
- // CHECK: [[STRONG]]
- // CHECK-NOT: br
- // CHECK: cmpxchg {{.*}} seq_cst seq_cst, align
- // CHECK: br
- // CHECK: [[WEAK]]
- // CHECK-NOT: br
- // CHECK: cmpxchg weak {{.*}} seq_cst seq_cst, align
- // CHECK: br
__atomic_compare_exchange_n(ptr, ptr2, 42, weak, memory_order_release, memory_order_acquire);
- // CHECK: switch i1 {{.*}}, label %[[WEAK:[0-9a-zA-Z._]+]] [
- // CHECK-NEXT: i1 false, label %[[STRONG:[0-9a-zA-Z._]+]]
- // CHECK: [[STRONG]]
- // CHECK-NOT: br
- // CHECK: cmpxchg {{.*}} release acquire
- // CHECK: br
- // CHECK: [[WEAK]]
- // CHECK-NOT: br
- // CHECK: cmpxchg weak {{.*}} release acquire
- // CHECK: br
}
// Having checked the flow in the previous two cases, we'll trust clang to
@@ -604,185 +329,74 @@ void generalWeakness(int *ptr, int *ptr2, _Bool weak) {
void EMIT_ALL_THE_THINGS(int *ptr, int *ptr2, int new, _Bool weak, int success, int fail) {
__atomic_compare_exchange(ptr, ptr2, &new, weak, success, fail);
- // CHECK: = cmpxchg {{.*}} monotonic monotonic, align
- // CHECK: = cmpxchg {{.*}} monotonic acquire, align
- // CHECK: = cmpxchg {{.*}} monotonic seq_cst, align
- // CHECK: = cmpxchg weak {{.*}} monotonic monotonic, align
- // CHECK: = cmpxchg weak {{.*}} monotonic acquire, align
- // CHECK: = cmpxchg weak {{.*}} monotonic seq_cst, align
- // CHECK: = cmpxchg {{.*}} acquire monotonic, align
- // CHECK: = cmpxchg {{.*}} acquire acquire, align
- // CHECK: = cmpxchg {{.*}} acquire seq_cst, align
- // CHECK: = cmpxchg weak {{.*}} acquire monotonic, align
- // CHECK: = cmpxchg weak {{.*}} acquire acquire, align
- // CHECK: = cmpxchg weak {{.*}} acquire seq_cst, align
- // CHECK: = cmpxchg {{.*}} release monotonic, align
- // CHECK: = cmpxchg {{.*}} release acquire, align
- // CHECK: = cmpxchg {{.*}} release seq_cst, align
- // CHECK: = cmpxchg weak {{.*}} release monotonic, align
- // CHECK: = cmpxchg weak {{.*}} release acquire, align
- // CHECK: = cmpxchg weak {{.*}} release seq_cst, align
- // CHECK: = cmpxchg {{.*}} acq_rel monotonic, align
- // CHECK: = cmpxchg {{.*}} acq_rel acquire, align
- // CHECK: = cmpxchg {{.*}} acq_rel seq_cst, align
- // CHECK: = cmpxchg weak {{.*}} acq_rel monotonic, align
- // CHECK: = cmpxchg weak {{.*}} acq_rel acquire, align
- // CHECK: = cmpxchg weak {{.*}} acq_rel seq_cst, align
- // CHECK: = cmpxchg {{.*}} seq_cst monotonic, align
- // CHECK: = cmpxchg {{.*}} seq_cst acquire, align
- // CHECK: = cmpxchg {{.*}} seq_cst seq_cst, align
- // CHECK: = cmpxchg weak {{.*}} seq_cst monotonic, align
- // CHECK: = cmpxchg weak {{.*}} seq_cst acquire, align
- // CHECK: = cmpxchg weak {{.*}} seq_cst seq_cst, align
}
int PR21643(void) {
return __atomic_or_fetch((int __attribute__((address_space(257))) *)0x308, 1,
__ATOMIC_RELAXED);
- // CHECK: %[[atomictmp:.*]] = alloca i32, align 4
- // CHECK: %[[atomicdst:.*]] = alloca i32, align 4
- // CHECK: store i32 1, ptr %[[atomictmp]]
- // CHECK: %[[one:.*]] = load i32, ptr %[[atomictmp]], align 4
- // CHECK: %[[old:.*]] = atomicrmw or ptr addrspace(257) inttoptr (i32 776 to ptr addrspace(257)), i32 %[[one]] monotonic, align 4
- // CHECK: %[[new:.*]] = or i32 %[[old]], %[[one]]
- // CHECK: store i32 %[[new]], ptr %[[atomicdst]], align 4
- // CHECK: %[[ret:.*]] = load i32, ptr %[[atomicdst]], align 4
- // CHECK: ret i32 %[[ret]]
}
int PR17306_1(volatile _Atomic(int) *i) {
- // CHECK-LABEL: @PR17306_1
- // CHECK: %[[i_addr:.*]] = alloca ptr
- // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32
- // CHECK-NEXT: store ptr %i, ptr %[[i_addr]]
- // CHECK-NEXT: %[[addr:.*]] = load ptr, ptr %[[i_addr]]
- // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, ptr %[[addr]] seq_cst, align 4
- // CHECK-NEXT: store i32 %[[res]], ptr %[[atomicdst]]
- // CHECK-NEXT: %[[retval:.*]] = load i32, ptr %[[atomicdst]]
- // CHECK-NEXT: ret i32 %[[retval]]
return __c11_atomic_load(i, memory_order_seq_cst);
}
int PR17306_2(volatile int *i, int value) {
- // CHECK-LABEL: @PR17306_2
- // CHECK: %[[i_addr:.*]] = alloca ptr
- // CHECK-NEXT: %[[value_addr:.*]] = alloca i32
- // CHECK-NEXT: %[[atomictmp:.*]] = alloca i32
- // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32
- // CHECK-NEXT: store ptr %i, ptr %[[i_addr]]
- // CHECK-NEXT: store i32 %value, ptr %[[value_addr]]
- // CHECK-NEXT: %[[i_lval:.*]] = load ptr, ptr %[[i_addr]]
- // CHECK-NEXT: %[[value:.*]] = load i32, ptr %[[value_addr]]
- // CHECK-NEXT: store i32 %[[value]], ptr %[[atomictmp]]
- // CHECK-NEXT: %[[value_lval:.*]] = load i32, ptr %[[atomictmp]]
- // CHECK-NEXT: %[[old_val:.*]] = atomicrmw volatile add ptr %[[i_lval]], i32 %[[value_lval]] seq_cst, align 4
- // CHECK-NEXT: %[[new_val:.*]] = add i32 %[[old_val]], %[[value_lval]]
- // CHECK-NEXT: store i32 %[[new_val]], ptr %[[atomicdst]]
- // CHECK-NEXT: %[[retval:.*]] = load i32, ptr %[[atomicdst]]
- // CHECK-NEXT: ret i32 %[[retval]]
return __atomic_add_fetch(i, value, memory_order_seq_cst);
}
void test_underaligned(void) {
- // CHECK-LABEL: @test_underaligned
struct Underaligned { char c[8]; } underaligned_a, underaligned_b, underaligned_c;
- // CHECK: load atomic i64, {{.*}}, align 1
__atomic_load(&underaligned_a, &underaligned_b, memory_order_seq_cst);
- // CHECK: store atomic i64 {{.*}}, align 1
__atomic_store(&underaligned_a, &underaligned_b, memory_order_seq_cst);
- // CHECK: atomicrmw xchg ptr {{.*}}, align 1
__atomic_exchange(&underaligned_a, &underaligned_b, &underaligned_c, memory_order_seq_cst);
- // CHECK: cmpxchg weak ptr {{.*}}, align 1
__atomic_compare_exchange(&underaligned_a, &underaligned_b, &underaligned_c, 1, memory_order_seq_cst, memory_order_seq_cst);
__attribute__((aligned)) struct Underaligned aligned_a, aligned_b, aligned_c;
- // CHECK: load atomic i64, {{.*}}, align 16
__atomic_load(&aligned_a, &aligned_b, memory_order_seq_cst);
- // CHECK: store atomic i64 {{.*}}, align 16
__atomic_store(&aligned_a, &aligned_b, memory_order_seq_cst);
- // CHECK: atomicrmw xchg ptr {{.*}}, align 16
__atomic_exchange(&aligned_a, &aligned_b, &aligned_c, memory_order_seq_cst);
- // CHECK: cmpxchg weak ptr {{.*}}, align 16
__atomic_compare_exchange(&aligned_a, &aligned_b, &aligned_c, 1, memory_order_seq_cst, memory_order_seq_cst);
}
void test_c11_minmax(_Atomic(int) * si, _Atomic(unsigned) * ui, _Atomic(short) * ss, _Atomic(unsigned char) * uc, _Atomic(long long) * sll) {
- // CHECK-LABEL: @test_c11_minmax
- // CHECK: atomicrmw max ptr {{.*}} acquire, align 4
*si = __c11_atomic_fetch_max(si, 42, memory_order_acquire);
- // CHECK: atomicrmw min ptr {{.*}} acquire, align 4
*si = __c11_atomic_fetch_min(si, 42, memory_order_acquire);
- // CHECK: atomicrmw umax ptr {{.*}} acquire, align 4
*ui = __c11_atomic_fetch_max(ui, 42, memory_order_acquire);
- // CHECK: atomicrmw umin ptr {{.*}} acquire, align 4
*ui = __c11_atomic_fetch_min(ui, 42, memory_order_acquire);
- // CHECK: atomicrmw max ptr {{.*}} acquire, align 2
*ss = __c11_atomic_fetch_max(ss, 42, memory_order_acquire);
- // CHECK: atomicrmw min ptr {{.*}} acquire, align 2
*ss = __c11_atomic_fetch_min(ss, 42, memory_order_acquire);
- // CHECK: atomicrmw umax ptr {{.*}} acquire, align 1
*uc = __c11_atomic_fetch_max(uc, 42, memory_order_acquire);
- // CHECK: atomicrmw umin ptr {{.*}} acquire, align 1
*uc = __c11_atomic_fetch_min(uc, 42, memory_order_acquire);
- // CHECK: atomicrmw max ptr {{.*}} acquire, align 8
*sll = __c11_atomic_fetch_max(sll, 42, memory_order_acquire);
- // CHECK: atomicrmw min ptr {{.*}} acquire, align 8
*sll = __c11_atomic_fetch_min(sll, 42, memory_order_acquire);
}
void test_minmax_postop(int *si, unsigned *ui, unsigned short *us, signed char *sc, unsigned long long *ull) {
int val = 42;
- // CHECK-LABEL: @test_minmax_postop
- // CHECK: [[OLD:%.*]] = atomicrmw max ptr [[PTR:%.*]], i32 [[RHS:%.*]] release, align 4
- // CHECK: [[TST:%.*]] = icmp sgt i32 [[OLD]], [[RHS]]
- // CHECK: [[NEW:%.*]] = select i1 [[TST]], i32 [[OLD]], i32 [[RHS]]
- // CHECK: store i32 [[NEW]], ptr
*si = __atomic_max_fetch(si, 42, memory_order_release);
- // CHECK: [[OLD:%.*]] = atomicrmw min ptr [[PTR:%.*]], i32 [[RHS:%.*]] release, align 4
- // CHECK: [[TST:%.*]] = icmp slt i32 [[OLD]], [[RHS]]
- // CHECK: [[NEW:%.*]] = select i1 [[TST]], i32 [[OLD]], i32 [[RHS]]
- // CHECK: store i32 [[NEW]], ptr
*si = __atomic_min_fetch(si, 42, memory_order_release);
- // CHECK: [[OLD:%.*]] = atomicrmw umax ptr [[PTR:%.*]], i32 [[RHS:%.*]] release, align 4
- // CHECK: [[TST:%.*]] = icmp ugt i32 [[OLD]], [[RHS]]
- // CHECK: [[NEW:%.*]] = select i1 [[TST]], i32 [[OLD]], i32 [[RHS]]
- // CHECK: store i32 [[NEW]], ptr
*ui = __atomic_max_fetch(ui, 42, memory_order_release);
- // CHECK: [[OLD:%.*]] = atomicrmw umin ptr [[PTR:%.*]], i32 [[RHS:%.*]] release, align 4
- // CHECK: [[TST:%.*]] = icmp ult i32 [[OLD]], [[RHS]]
- // CHECK: [[NEW:%.*]] = select i1 [[TST]], i32 [[OLD]], i32 [[RHS]]
- // CHECK: store i32 [[NEW]], ptr
*ui = __atomic_min_fetch(ui, 42, memory_order_release);
- // CHECK: [[OLD:%.*]] = atomicrmw umin ptr [[PTR:%.*]], i16 [[RHS:%.*]] release, align 2
- // CHECK: [[TST:%.*]] = icmp ult i16 [[OLD]], [[RHS]]
- // CHECK: [[NEW:%.*]] = select i1 [[TST]], i16 [[OLD]], i16 [[RHS]]
- // CHECK: store i16 [[NEW]], ptr
*us = __atomic_min_fetch(us, 42, memory_order_release);
- // CHECK: [[OLD:%.*]] = atomicrmw min ptr [[PTR:%.*]], i8 [[RHS:%.*]] release, align 1
- // CHECK: [[TST:%.*]] = icmp slt i8 [[OLD]], [[RHS]]
- // CHECK: [[NEW:%.*]] = select i1 [[TST]], i8 [[OLD]], i8 [[RHS]]
- // CHECK: store i8 [[NEW]], ptr
*sc = __atomic_min_fetch(sc, 42, memory_order_release);
- // CHECK: [[OLD:%.*]] = atomicrmw umin ptr {{%.*}}, i64 [[RHS:%.*]] release, align 4
- // CHECK: [[TST:%.*]] = icmp ult i64 [[OLD]], [[RHS]]
- // CHECK: [[NEW:%.*]] = select i1 [[TST]], i64 [[OLD]], i64 [[RHS]]
- // CHECK: store i64 [[NEW]], ptr
*ull = __atomic_min_fetch(ull, 42, memory_order_release);
}
#endif
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// CHECK: {{.*}}
diff --git a/clang/test/CodeGen/atomic_ops.c b/clang/test/CodeGen/atomic_ops.c
index fb34fc2a43836d..03d1f7909f57e3 100644
--- a/clang/test/CodeGen/atomic_ops.c
+++ b/clang/test/CodeGen/atomic_ops.c
@@ -1,116 +1,372 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple x86_64 -emit-llvm %s \
// RUN: -o - | FileCheck -check-prefixes=CHECK,NATIVE %s
// RUN: %clang_cc1 -triple riscv32 -target-feature -a -emit-llvm %s \
// RUN: -o - | FileCheck -check-prefixes=CHECK,LIBCALL %s
+// NATIVE-LABEL: define dso_local void @foo(
+// NATIVE-SAME: i32 noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// NATIVE-NEXT: [[ENTRY:.*]]:
+// NATIVE-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
+// NATIVE-NEXT: [[I:%.*]] = alloca i32, align 4
+// NATIVE-NEXT: [[J:%.*]] = alloca i16, align 2
+// NATIVE-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4
+// NATIVE-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4
+// NATIVE-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4
+// NATIVE-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4
+// NATIVE-NEXT: [[ATOMIC_TEMP7:%.*]] = alloca i32, align 4
+// NATIVE-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4
+// NATIVE-NEXT: [[ATOMIC_TEMP19:%.*]] = alloca i16, align 2
+// NATIVE-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i16, align 2
+// NATIVE-NEXT: [[ATOMIC_TEMP21:%.*]] = alloca i16, align 2
+// NATIVE-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
+// NATIVE-NEXT: store i32 0, ptr [[I]], align 4
+// NATIVE-NEXT: store i16 0, ptr [[J]], align 2
+// NATIVE-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[I]] seq_cst, align 4
+// NATIVE-NEXT: br label %[[ATOMIC_OP:.*]]
+// NATIVE: [[ATOMIC_OP]]:
+// NATIVE-NEXT: [[TMP0:%.*]] = phi i32 [ [[ATOMIC_LOAD]], %[[ENTRY]] ], [ [[TMP1:%.*]], %[[ATOMIC_OP]] ]
+// NATIVE-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 2
+// NATIVE-NEXT: store i32 [[TMP0]], ptr [[ATOMIC_TEMP]], align 4
+// NATIVE-NEXT: store i32 [[MUL]], ptr [[ATOMIC_TEMP1]], align 4
+// NATIVE-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
+// NATIVE-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[ATOMIC_TEMP1]], align 4
+// NATIVE-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[I]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 4
+// NATIVE-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
+// NATIVE-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
+// NATIVE-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP2]], align 4
+// NATIVE-NEXT: [[TMP1]] = load i32, ptr [[ATOMIC_TEMP2]], align 4
+// NATIVE-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// NATIVE: [[ATOMIC_CONT]]:
+// NATIVE-NEXT: [[ATOMIC_LOAD4:%.*]] = load atomic i32, ptr [[I]] seq_cst, align 4
+// NATIVE-NEXT: br label %[[ATOMIC_OP3:.*]]
+// NATIVE: [[ATOMIC_OP3]]:
+// NATIVE-NEXT: [[TMP2:%.*]] = phi i32 [ [[ATOMIC_LOAD4]], %[[ATOMIC_CONT]] ], [ [[TMP3:%.*]], %[[ATOMIC_OP3]] ]
+// NATIVE-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP2]], 2
+// NATIVE-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP6]], align 4
+// NATIVE-NEXT: store i32 [[DIV]], ptr [[ATOMIC_TEMP7]], align 4
+// NATIVE-NEXT: [[CMPXCHG_EXPECTED9:%.*]] = load i32, ptr [[ATOMIC_TEMP6]], align 4
+// NATIVE-NEXT: [[CMPXCHG_DESIRED10:%.*]] = load i32, ptr [[ATOMIC_TEMP7]], align 4
+// NATIVE-NEXT: [[CMPXCHG_PAIR11:%.*]] = cmpxchg ptr [[I]], i32 [[CMPXCHG_EXPECTED9]], i32 [[CMPXCHG_DESIRED10]] seq_cst seq_cst, align 4
+// NATIVE-NEXT: [[CMPXCHG_PREV12:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR11]], 0
+// NATIVE-NEXT: [[CMPXCHG_SUCCESS13:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR11]], 1
+// NATIVE-NEXT: store i32 [[CMPXCHG_PREV12]], ptr [[ATOMIC_TEMP8]], align 4
+// NATIVE-NEXT: [[TMP3]] = load i32, ptr [[ATOMIC_TEMP8]], align 4
+// NATIVE-NEXT: br i1 [[CMPXCHG_SUCCESS13]], label %[[ATOMIC_CONT5:.*]], label %[[ATOMIC_OP3]]
+// NATIVE: [[ATOMIC_CONT5]]:
+// NATIVE-NEXT: [[TMP4:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// NATIVE-NEXT: [[ATOMIC_LOAD15:%.*]] = load atomic i16, ptr [[J]] seq_cst, align 2
+// NATIVE-NEXT: br label %[[ATOMIC_OP14:.*]]
+// NATIVE: [[ATOMIC_OP14]]:
+// NATIVE-NEXT: [[TMP5:%.*]] = phi i16 [ [[ATOMIC_LOAD15]], %[[ATOMIC_CONT5]] ], [ [[TMP6:%.*]], %[[ATOMIC_OP14]] ]
+// NATIVE-NEXT: [[CONV:%.*]] = zext i16 [[TMP5]] to i32
+// NATIVE-NEXT: [[DIV16:%.*]] = sdiv i32 [[CONV]], [[TMP4]]
+// NATIVE-NEXT: [[CONV17:%.*]] = trunc i32 [[DIV16]] to i16
+// NATIVE-NEXT: store i16 [[TMP5]], ptr [[ATOMIC_TEMP19]], align 2
+// NATIVE-NEXT: store i16 [[CONV17]], ptr [[ATOMIC_TEMP20]], align 2
+// NATIVE-NEXT: [[CMPXCHG_EXPECTED22:%.*]] = load i16, ptr [[ATOMIC_TEMP19]], align 2
+// NATIVE-NEXT: [[CMPXCHG_DESIRED23:%.*]] = load i16, ptr [[ATOMIC_TEMP20]], align 2
+// NATIVE-NEXT: [[CMPXCHG_PAIR24:%.*]] = cmpxchg ptr [[J]], i16 [[CMPXCHG_EXPECTED22]], i16 [[CMPXCHG_DESIRED23]] seq_cst seq_cst, align 2
+// NATIVE-NEXT: [[CMPXCHG_PREV25:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR24]], 0
+// NATIVE-NEXT: [[CMPXCHG_SUCCESS26:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR24]], 1
+// NATIVE-NEXT: store i16 [[CMPXCHG_PREV25]], ptr [[ATOMIC_TEMP21]], align 2
+// NATIVE-NEXT: [[TMP6]] = load i16, ptr [[ATOMIC_TEMP21]], align 2
+// NATIVE-NEXT: br i1 [[CMPXCHG_SUCCESS26]], label %[[ATOMIC_CONT18:.*]], label %[[ATOMIC_OP14]]
+// NATIVE: [[ATOMIC_CONT18]]:
+// NATIVE-NEXT: ret void
+//
+// LIBCALL-LABEL: define dso_local void @foo(
+// LIBCALL-SAME: i32 noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// LIBCALL-NEXT: [[ENTRY:.*]]:
+// LIBCALL-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
+// LIBCALL-NEXT: [[I:%.*]] = alloca i32, align 4
+// LIBCALL-NEXT: [[J:%.*]] = alloca i16, align 2
+// LIBCALL-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4
+// LIBCALL-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4
+// LIBCALL-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4
+// LIBCALL-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i32, align 4
+// LIBCALL-NEXT: [[ATOMIC_TEMP5:%.*]] = alloca i32, align 4
+// LIBCALL-NEXT: [[ATOMIC_TEMP7:%.*]] = alloca i32, align 4
+// LIBCALL-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4
+// LIBCALL-NEXT: [[ATOMIC_TEMP9:%.*]] = alloca i32, align 4
+// LIBCALL-NEXT: [[ATOMIC_TEMP16:%.*]] = alloca i16, align 2
+// LIBCALL-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i16, align 2
+// LIBCALL-NEXT: [[ATOMIC_TEMP21:%.*]] = alloca i16, align 2
+// LIBCALL-NEXT: [[ATOMIC_TEMP22:%.*]] = alloca i16, align 2
+// LIBCALL-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
+// LIBCALL-NEXT: store i32 0, ptr [[I]], align 4
+// LIBCALL-NEXT: store i16 0, ptr [[J]], align 2
+// LIBCALL-NEXT: call void @__atomic_load(i32 noundef 4, ptr noundef [[I]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
+// LIBCALL-NEXT: [[TMP0:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
+// LIBCALL-NEXT: br label %[[ATOMIC_OP:.*]]
+// LIBCALL: [[ATOMIC_OP]]:
+// LIBCALL-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP0]], %[[ENTRY]] ], [ [[TMP2:%.*]], %[[ATOMIC_OP]] ]
+// LIBCALL-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP1]], 2
+// LIBCALL-NEXT: store i32 [[TMP1]], ptr [[ATOMIC_TEMP1]], align 4
+// LIBCALL-NEXT: store i32 [[MUL]], ptr [[ATOMIC_TEMP2]], align 4
+// LIBCALL-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[ATOMIC_TEMP1]], align 4
+// LIBCALL-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[ATOMIC_TEMP2]], align 4
+// LIBCALL-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[I]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 4
+// LIBCALL-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
+// LIBCALL-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
+// LIBCALL-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP3]], align 4
+// LIBCALL-NEXT: [[TMP2]] = load i32, ptr [[ATOMIC_TEMP3]], align 4
+// LIBCALL-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// LIBCALL: [[ATOMIC_CONT]]:
+// LIBCALL-NEXT: call void @__atomic_load(i32 noundef 4, ptr noundef [[I]], ptr noundef [[ATOMIC_TEMP5]], i32 noundef 5)
+// LIBCALL-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP5]], align 4
+// LIBCALL-NEXT: br label %[[ATOMIC_OP4:.*]]
+// LIBCALL: [[ATOMIC_OP4]]:
+// LIBCALL-NEXT: [[TMP4:%.*]] = phi i32 [ [[TMP3]], %[[ATOMIC_CONT]] ], [ [[TMP5:%.*]], %[[ATOMIC_OP4]] ]
+// LIBCALL-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 2
+// LIBCALL-NEXT: store i32 [[TMP4]], ptr [[ATOMIC_TEMP7]], align 4
+// LIBCALL-NEXT: store i32 [[DIV]], ptr [[ATOMIC_TEMP8]], align 4
+// LIBCALL-NEXT: [[CMPXCHG_EXPECTED10:%.*]] = load i32, ptr [[ATOMIC_TEMP7]], align 4
+// LIBCALL-NEXT: [[CMPXCHG_DESIRED11:%.*]] = load i32, ptr [[ATOMIC_TEMP8]], align 4
+// LIBCALL-NEXT: [[CMPXCHG_PAIR12:%.*]] = cmpxchg ptr [[I]], i32 [[CMPXCHG_EXPECTED10]], i32 [[CMPXCHG_DESIRED11]] seq_cst seq_cst, align 4
+// LIBCALL-NEXT: [[CMPXCHG_PREV13:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR12]], 0
+// LIBCALL-NEXT: [[CMPXCHG_SUCCESS14:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR12]], 1
+// LIBCALL-NEXT: store i32 [[CMPXCHG_PREV13]], ptr [[ATOMIC_TEMP9]], align 4
+// LIBCALL-NEXT: [[TMP5]] = load i32, ptr [[ATOMIC_TEMP9]], align 4
+// LIBCALL-NEXT: br i1 [[CMPXCHG_SUCCESS14]], label %[[ATOMIC_CONT6:.*]], label %[[ATOMIC_OP4]]
+// LIBCALL: [[ATOMIC_CONT6]]:
+// LIBCALL-NEXT: [[TMP6:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// LIBCALL-NEXT: call void @__atomic_load(i32 noundef 2, ptr noundef [[J]], ptr noundef [[ATOMIC_TEMP16]], i32 noundef 5)
+// LIBCALL-NEXT: [[TMP7:%.*]] = load i16, ptr [[ATOMIC_TEMP16]], align 2
+// LIBCALL-NEXT: br label %[[ATOMIC_OP15:.*]]
+// LIBCALL: [[ATOMIC_OP15]]:
+// LIBCALL-NEXT: [[TMP8:%.*]] = phi i16 [ [[TMP7]], %[[ATOMIC_CONT6]] ], [ [[TMP9:%.*]], %[[ATOMIC_OP15]] ]
+// LIBCALL-NEXT: [[CONV:%.*]] = zext i16 [[TMP8]] to i32
+// LIBCALL-NEXT: [[DIV17:%.*]] = sdiv i32 [[CONV]], [[TMP6]]
+// LIBCALL-NEXT: [[CONV18:%.*]] = trunc i32 [[DIV17]] to i16
+// LIBCALL-NEXT: store i16 [[TMP8]], ptr [[ATOMIC_TEMP20]], align 2
+// LIBCALL-NEXT: store i16 [[CONV18]], ptr [[ATOMIC_TEMP21]], align 2
+// LIBCALL-NEXT: [[CMPXCHG_EXPECTED23:%.*]] = load i16, ptr [[ATOMIC_TEMP20]], align 2
+// LIBCALL-NEXT: [[CMPXCHG_DESIRED24:%.*]] = load i16, ptr [[ATOMIC_TEMP21]], align 2
+// LIBCALL-NEXT: [[CMPXCHG_PAIR25:%.*]] = cmpxchg ptr [[J]], i16 [[CMPXCHG_EXPECTED23]], i16 [[CMPXCHG_DESIRED24]] seq_cst seq_cst, align 2
+// LIBCALL-NEXT: [[CMPXCHG_PREV26:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR25]], 0
+// LIBCALL-NEXT: [[CMPXCHG_SUCCESS27:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR25]], 1
+// LIBCALL-NEXT: store i16 [[CMPXCHG_PREV26]], ptr [[ATOMIC_TEMP22]], align 2
+// LIBCALL-NEXT: [[TMP9]] = load i16, ptr [[ATOMIC_TEMP22]], align 2
+// LIBCALL-NEXT: br i1 [[CMPXCHG_SUCCESS27]], label %[[ATOMIC_CONT19:.*]], label %[[ATOMIC_OP15]]
+// LIBCALL: [[ATOMIC_CONT19]]:
+// LIBCALL-NEXT: ret void
+//
void foo(int x)
{
_Atomic(int) i = 0;
_Atomic(short) j = 0;
// Check that multiply / divides on atomics produce a cmpxchg loop
i *= 2;
- // NATIVE: mul nsw i32
- // NATIVE: cmpxchg ptr {{.*}} seq_cst, align 4
- // LIBCALL: mul nsw i32
- // LIBCALL: i1 @__atomic_compare_exchange(i32 noundef 4,
i /= 2;
- // NATIVE: sdiv i32
- // NATIVE: cmpxchg ptr {{.*}} seq_cst, align 4
- // LIBCALL: sdiv i32
- // LIBCALL: i1 @__atomic_compare_exchange(i32 noundef 4,
j /= x;
- // NATIVE: sdiv i32
- // NATIVE: cmpxchg ptr {{.*}} seq_cst, align 2
- // LIBCALL: sdiv i32
- // LIBCALL: i1 @__atomic_compare_exchange(i32 noundef 2,
}
-// LIBCALL: declare void @__atomic_load(i32, ptr, ptr, i32) [[LC_ATTRS:#[0-9]+]]
-// LIBCALL: declare i1 @__atomic_compare_exchange(i32, ptr, ptr, ptr, i32, i32) [[LC_ATTRS:#[0-9]+]]
extern _Atomic _Bool b;
+// NATIVE-LABEL: define dso_local zeroext i1 @bar(
+// NATIVE-SAME: ) #[[ATTR0]] {
+// NATIVE-NEXT: [[ENTRY:.*:]]
+// NATIVE-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr @b seq_cst, align 1
+// NATIVE-NEXT: [[LOADEDV:%.*]] = trunc i8 [[ATOMIC_LOAD]] to i1
+// NATIVE-NEXT: ret i1 [[LOADEDV]]
+//
+// LIBCALL-LABEL: define dso_local zeroext i1 @bar(
+// LIBCALL-SAME: ) #[[ATTR0]] {
+// LIBCALL-NEXT: [[ENTRY:.*:]]
+// LIBCALL-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
+// LIBCALL-NEXT: call void @__atomic_load(i32 noundef 1, ptr noundef @b, ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
+// LIBCALL-NEXT: [[TMP0:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
+// LIBCALL-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// LIBCALL-NEXT: ret i1 [[LOADEDV]]
+//
_Bool bar(void) {
-// NATIVE-LABEL: @bar
-// NATIVE: %[[load:.*]] = load atomic i8, ptr @b seq_cst, align 1
-// NATIVE: %[[tobool:.*]] = trunc i8 %[[load]] to i1
-// NATIVE: ret i1 %[[tobool]]
-// LIBCALL-LABEL: @bar
-// LIBCALL: call void @__atomic_load(i32 noundef 1, ptr noundef @b, ptr noundef %atomic-temp, i32 noundef 5)
-// LIBCALL: %[[load:.*]] = load i8, ptr %atomic-temp
-// LIBCALL: %[[tobool:.*]] = trunc i8 %[[load]] to i1
-// LIBCALL: ret i1 %[[tobool]]
return b;
}
extern _Atomic(_Complex int) x;
+// NATIVE-LABEL: define dso_local void @baz(
+// NATIVE-SAME: i32 noundef [[Y:%.*]]) #[[ATTR0]] {
+// NATIVE-NEXT: [[ENTRY:.*:]]
+// NATIVE-NEXT: [[Y_ADDR:%.*]] = alloca i32, align 4
+// NATIVE-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { i32, i32 }, align 8
+// NATIVE-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca { i32, i32 }, align 8
+// NATIVE-NEXT: store i32 [[Y]], ptr [[Y_ADDR]], align 4
+// NATIVE-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y_ADDR]], align 4
+// NATIVE-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i64, ptr @x seq_cst, align 8
+// NATIVE-NEXT: store i64 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP]], align 8
+// NATIVE-NEXT: [[ATOMIC_TEMP_REALP:%.*]] = getelementptr inbounds { i32, i32 }, ptr [[ATOMIC_TEMP]], i32 0, i32 0
+// NATIVE-NEXT: [[ATOMIC_TEMP_REAL:%.*]] = load i32, ptr [[ATOMIC_TEMP_REALP]], align 8
+// NATIVE-NEXT: [[ATOMIC_TEMP_IMAGP:%.*]] = getelementptr inbounds { i32, i32 }, ptr [[ATOMIC_TEMP]], i32 0, i32 1
+// NATIVE-NEXT: [[ATOMIC_TEMP_IMAG:%.*]] = load i32, ptr [[ATOMIC_TEMP_IMAGP]], align 4
+// NATIVE-NEXT: [[ADD_R:%.*]] = add i32 [[ATOMIC_TEMP_REAL]], [[TMP0]]
+// NATIVE-NEXT: [[ADD_I:%.*]] = add i32 [[ATOMIC_TEMP_IMAG]], 0
+// NATIVE-NEXT: [[ATOMIC_TEMP1_REALP:%.*]] = getelementptr inbounds { i32, i32 }, ptr [[ATOMIC_TEMP1]], i32 0, i32 0
+// NATIVE-NEXT: [[ATOMIC_TEMP1_IMAGP:%.*]] = getelementptr inbounds { i32, i32 }, ptr [[ATOMIC_TEMP1]], i32 0, i32 1
+// NATIVE-NEXT: store i32 [[ADD_R]], ptr [[ATOMIC_TEMP1_REALP]], align 8
+// NATIVE-NEXT: store i32 [[ADD_I]], ptr [[ATOMIC_TEMP1_IMAGP]], align 4
+// NATIVE-NEXT: [[TMP1:%.*]] = load i64, ptr [[ATOMIC_TEMP1]], align 8
+// NATIVE-NEXT: store atomic i64 [[TMP1]], ptr @x seq_cst, align 8
+// NATIVE-NEXT: ret void
+//
+// LIBCALL-LABEL: define dso_local void @baz(
+// LIBCALL-SAME: i32 noundef [[Y:%.*]]) #[[ATTR0]] {
+// LIBCALL-NEXT: [[ENTRY:.*:]]
+// LIBCALL-NEXT: [[Y_ADDR:%.*]] = alloca i32, align 4
+// LIBCALL-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { i32, i32 }, align 8
+// LIBCALL-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca { i32, i32 }, align 8
+// LIBCALL-NEXT: store i32 [[Y]], ptr [[Y_ADDR]], align 4
+// LIBCALL-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y_ADDR]], align 4
+// LIBCALL-NEXT: call void @__atomic_load(i32 noundef 8, ptr noundef @x, ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
+// LIBCALL-NEXT: [[ATOMIC_TEMP_REALP:%.*]] = getelementptr inbounds { i32, i32 }, ptr [[ATOMIC_TEMP]], i32 0, i32 0
+// LIBCALL-NEXT: [[ATOMIC_TEMP_REAL:%.*]] = load i32, ptr [[ATOMIC_TEMP_REALP]], align 8
+// LIBCALL-NEXT: [[ATOMIC_TEMP_IMAGP:%.*]] = getelementptr inbounds { i32, i32 }, ptr [[ATOMIC_TEMP]], i32 0, i32 1
+// LIBCALL-NEXT: [[ATOMIC_TEMP_IMAG:%.*]] = load i32, ptr [[ATOMIC_TEMP_IMAGP]], align 4
+// LIBCALL-NEXT: [[ADD_R:%.*]] = add i32 [[ATOMIC_TEMP_REAL]], [[TMP0]]
+// LIBCALL-NEXT: [[ADD_I:%.*]] = add i32 [[ATOMIC_TEMP_IMAG]], 0
+// LIBCALL-NEXT: [[ATOMIC_TEMP1_REALP:%.*]] = getelementptr inbounds { i32, i32 }, ptr [[ATOMIC_TEMP1]], i32 0, i32 0
+// LIBCALL-NEXT: [[ATOMIC_TEMP1_IMAGP:%.*]] = getelementptr inbounds { i32, i32 }, ptr [[ATOMIC_TEMP1]], i32 0, i32 1
+// LIBCALL-NEXT: store i32 [[ADD_R]], ptr [[ATOMIC_TEMP1_REALP]], align 8
+// LIBCALL-NEXT: store i32 [[ADD_I]], ptr [[ATOMIC_TEMP1_IMAGP]], align 4
+// LIBCALL-NEXT: call void @__atomic_store(i32 noundef 8, ptr noundef @x, ptr noundef [[ATOMIC_TEMP1]], i32 noundef 5)
+// LIBCALL-NEXT: ret void
+//
void baz(int y) {
-// NATIVE-LABEL: @baz
-// NATIVE: store atomic i64 {{.*}} seq_cst, align 8
-// LIBCALL-LABEL: @baz
-// LIBCALL: call void @__atomic_store
x += y;
}
-// LIBCALL: declare void @__atomic_store(i32, ptr, ptr, i32) [[LC_ATTRS:#[0-9]+]]
+// CHECK-LABEL: define dso_local i32 @compound_add(
+// CHECK-SAME: i32 [[IN:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i32 [[IN]], ptr [[IN_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw add ptr [[IN_ADDR]], i32 5 seq_cst, align 4
+// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 5
+// CHECK-NEXT: ret i32 [[TMP1]]
+//
_Atomic(int) compound_add(_Atomic(int) in) {
-// CHECK-LABEL: @compound_add
-// CHECK: [[OLD:%.*]] = atomicrmw add ptr {{.*}}, i32 5 seq_cst, align 4
-// CHECK: [[NEW:%.*]] = add i32 [[OLD]], 5
-// CHECK: ret i32 [[NEW]]
return (in += 5);
}
+// CHECK-LABEL: define dso_local i32 @compound_sub(
+// CHECK-SAME: i32 [[IN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i32 [[IN]], ptr [[IN_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw sub ptr [[IN_ADDR]], i32 5 seq_cst, align 4
+// CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], 5
+// CHECK-NEXT: ret i32 [[TMP1]]
+//
_Atomic(int) compound_sub(_Atomic(int) in) {
-// CHECK-LABEL: @compound_sub
-// CHECK: [[OLD:%.*]] = atomicrmw sub ptr {{.*}}, i32 5 seq_cst, align 4
-// CHECK: [[NEW:%.*]] = sub i32 [[OLD]], 5
-// CHECK: ret i32 [[NEW]]
return (in -= 5);
}
+// CHECK-LABEL: define dso_local i32 @compound_xor(
+// CHECK-SAME: i32 [[IN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i32 [[IN]], ptr [[IN_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw xor ptr [[IN_ADDR]], i32 5 seq_cst, align 4
+// CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[TMP0]], 5
+// CHECK-NEXT: ret i32 [[TMP1]]
+//
_Atomic(int) compound_xor(_Atomic(int) in) {
-// CHECK-LABEL: @compound_xor
-// CHECK: [[OLD:%.*]] = atomicrmw xor ptr {{.*}}, i32 5 seq_cst, align 4
-// CHECK: [[NEW:%.*]] = xor i32 [[OLD]], 5
-// CHECK: ret i32 [[NEW]]
return (in ^= 5);
}
+// CHECK-LABEL: define dso_local i32 @compound_or(
+// CHECK-SAME: i32 [[IN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i32 [[IN]], ptr [[IN_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw or ptr [[IN_ADDR]], i32 5 seq_cst, align 4
+// CHECK-NEXT: [[TMP1:%.*]] = or i32 [[TMP0]], 5
+// CHECK-NEXT: ret i32 [[TMP1]]
+//
_Atomic(int) compound_or(_Atomic(int) in) {
-// CHECK-LABEL: @compound_or
-// CHECK: [[OLD:%.*]] = atomicrmw or ptr {{.*}}, i32 5 seq_cst, align 4
-// CHECK: [[NEW:%.*]] = or i32 [[OLD]], 5
-// CHECK: ret i32 [[NEW]]
return (in |= 5);
}
+// CHECK-LABEL: define dso_local i32 @compound_and(
+// CHECK-SAME: i32 [[IN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i32 [[IN]], ptr [[IN_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw and ptr [[IN_ADDR]], i32 5 seq_cst, align 4
+// CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 5
+// CHECK-NEXT: ret i32 [[TMP1]]
+//
_Atomic(int) compound_and(_Atomic(int) in) {
-// CHECK-LABEL: @compound_and
-// CHECK: [[OLD:%.*]] = atomicrmw and ptr {{.*}}, i32 5 seq_cst, align 4
-// CHECK: [[NEW:%.*]] = and i32 [[OLD]], 5
-// CHECK: ret i32 [[NEW]]
return (in &= 5);
}
+// NATIVE-LABEL: define dso_local i32 @compound_mul(
+// NATIVE-SAME: i32 [[IN:%.*]]) #[[ATTR0]] {
+// NATIVE-NEXT: [[ENTRY:.*]]:
+// NATIVE-NEXT: [[IN_ADDR:%.*]] = alloca i32, align 4
+// NATIVE-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4
+// NATIVE-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4
+// NATIVE-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4
+// NATIVE-NEXT: store i32 [[IN]], ptr [[IN_ADDR]], align 4
+// NATIVE-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[IN_ADDR]] seq_cst, align 4
+// NATIVE-NEXT: br label %[[ATOMIC_OP:.*]]
+// NATIVE: [[ATOMIC_OP]]:
+// NATIVE-NEXT: [[TMP0:%.*]] = phi i32 [ [[ATOMIC_LOAD]], %[[ENTRY]] ], [ [[TMP1:%.*]], %[[ATOMIC_OP]] ]
+// NATIVE-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 5
+// NATIVE-NEXT: store i32 [[TMP0]], ptr [[ATOMIC_TEMP]], align 4
+// NATIVE-NEXT: store i32 [[MUL]], ptr [[ATOMIC_TEMP1]], align 4
+// NATIVE-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
+// NATIVE-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[ATOMIC_TEMP1]], align 4
+// NATIVE-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[IN_ADDR]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 4
+// NATIVE-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
+// NATIVE-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
+// NATIVE-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP2]], align 4
+// NATIVE-NEXT: [[TMP1]] = load i32, ptr [[ATOMIC_TEMP2]], align 4
+// NATIVE-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// NATIVE: [[ATOMIC_CONT]]:
+// NATIVE-NEXT: ret i32 [[MUL]]
+//
+// LIBCALL-LABEL: define dso_local i32 @compound_mul(
+// LIBCALL-SAME: i32 [[IN:%.*]]) #[[ATTR0]] {
+// LIBCALL-NEXT: [[ENTRY:.*]]:
+// LIBCALL-NEXT: [[IN_ADDR:%.*]] = alloca i32, align 4
+// LIBCALL-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4
+// LIBCALL-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4
+// LIBCALL-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4
+// LIBCALL-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i32, align 4
+// LIBCALL-NEXT: store i32 [[IN]], ptr [[IN_ADDR]], align 4
+// LIBCALL-NEXT: call void @__atomic_load(i32 noundef 4, ptr noundef [[IN_ADDR]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
+// LIBCALL-NEXT: [[TMP0:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
+// LIBCALL-NEXT: br label %[[ATOMIC_OP:.*]]
+// LIBCALL: [[ATOMIC_OP]]:
+// LIBCALL-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP0]], %[[ENTRY]] ], [ [[TMP2:%.*]], %[[ATOMIC_OP]] ]
+// LIBCALL-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP1]], 5
+// LIBCALL-NEXT: store i32 [[TMP1]], ptr [[ATOMIC_TEMP1]], align 4
+// LIBCALL-NEXT: store i32 [[MUL]], ptr [[ATOMIC_TEMP2]], align 4
+// LIBCALL-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[ATOMIC_TEMP1]], align 4
+// LIBCALL-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[ATOMIC_TEMP2]], align 4
+// LIBCALL-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[IN_ADDR]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 4
+// LIBCALL-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
+// LIBCALL-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
+// LIBCALL-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP3]], align 4
+// LIBCALL-NEXT: [[TMP2]] = load i32, ptr [[ATOMIC_TEMP3]], align 4
+// LIBCALL-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// LIBCALL: [[ATOMIC_CONT]]:
+// LIBCALL-NEXT: ret i32 [[MUL]]
+//
_Atomic(int) compound_mul(_Atomic(int) in) {
-// NATIVE-LABEL: @compound_mul
-// NATIVE: cmpxchg ptr {{%.*}}, i32 {{%.*}}, i32 [[NEW:%.*]] seq_cst seq_cst, align 4
-// NATIVE: ret i32 [[NEW]]
-// LIBCALL-LABEL: @compound_mul
-// LIBCALL: i1 @__atomic_compare_exchange(i32 noundef 4,
return (in *= 5);
}
-// LIBCALL: [[LC_ATTRS]] = { nounwind willreturn }
diff --git a/clang/test/CodeGen/c11atomics-ios.c b/clang/test/CodeGen/c11atomics-ios.c
index 811820b67fbdbf..f48313941e329f 100644
--- a/clang/test/CodeGen/c11atomics-ios.c
+++ b/clang/test/CodeGen/c11atomics-ios.c
@@ -1,264 +1,77 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv7-apple-ios -std=c11 | FileCheck %s
// There isn't really anything special about iOS; it just happens to
// only deploy on processors with native atomics support, so it's a good
// way to test those code-paths.
-// CHECK-LABEL: define{{.*}} void @testFloat(ptr
void testFloat(_Atomic(float) *fp) {
-// CHECK: [[FP:%.*]] = alloca ptr
-// CHECK-NEXT: [[X:%.*]] = alloca float
-// CHECK-NEXT: [[F:%.*]] = alloca float
-// CHECK-NEXT: store ptr {{%.*}}, ptr [[FP]]
-// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
-// CHECK-NEXT: store float 1.000000e+00, ptr [[T0]], align 4
__c11_atomic_init(fp, 1.0f);
-// CHECK-NEXT: store float 2.000000e+00, ptr [[X]], align 4
_Atomic(float) x = 2.0f;
-// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
-// CHECK-NEXT: [[T2:%.*]] = load atomic float, ptr [[T0]] seq_cst, align 4
-// CHECK-NEXT: store float [[T2]], ptr [[F]]
float f = *fp;
-// CHECK-NEXT: [[T0:%.*]] = load float, ptr [[F]], align 4
-// CHECK-NEXT: [[T1:%.*]] = load ptr, ptr [[FP]], align 4
-// CHECK-NEXT: store atomic float [[T0]], ptr [[T1]] seq_cst, align 4
*fp = f;
-// CHECK-NEXT: ret void
}
-// CHECK: define{{.*}} void @testComplexFloat(ptr
void testComplexFloat(_Atomic(_Complex float) *fp) {
-// CHECK: [[FP:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[X:%.*]] = alloca [[CF:{ float, float }]], align 8
-// CHECK-NEXT: [[F:%.*]] = alloca [[CF]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = alloca [[CF]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = alloca [[CF]], align 8
-// CHECK-NEXT: store ptr
-
-// CHECK-NEXT: [[P:%.*]] = load ptr, ptr [[FP]]
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[P]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], ptr [[P]], i32 0, i32 1
-// CHECK-NEXT: store float 1.000000e+00, ptr [[T0]]
-// CHECK-NEXT: store float 0.000000e+00, ptr [[T1]]
+
__c11_atomic_init(fp, 1.0f);
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[X]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], ptr [[X]], i32 0, i32 1
-// CHECK-NEXT: store float 2.000000e+00, ptr [[T0]]
-// CHECK-NEXT: store float 0.000000e+00, ptr [[T1]]
_Atomic(_Complex float) x = 2.0f;
-// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
-// CHECK-NEXT: [[T2:%.*]] = load atomic i64, ptr [[T0]] seq_cst, align 8
-// CHECK-NEXT: store i64 [[T2]], ptr [[TMP0]], align 8
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[TMP0]], i32 0, i32 0
-// CHECK-NEXT: [[R:%.*]] = load float, ptr [[T0]]
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[TMP0]], i32 0, i32 1
-// CHECK-NEXT: [[I:%.*]] = load float, ptr [[T0]]
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[F]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], ptr [[F]], i32 0, i32 1
-// CHECK-NEXT: store float [[R]], ptr [[T0]]
-// CHECK-NEXT: store float [[I]], ptr [[T1]]
_Complex float f = *fp;
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[F]], i32 0, i32 0
-// CHECK-NEXT: [[R:%.*]] = load float, ptr [[T0]]
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[F]], i32 0, i32 1
-// CHECK-NEXT: [[I:%.*]] = load float, ptr [[T0]]
-// CHECK-NEXT: [[DEST:%.*]] = load ptr, ptr [[FP]], align 4
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[TMP1]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], ptr [[TMP1]], i32 0, i32 1
-// CHECK-NEXT: store float [[R]], ptr [[T0]]
-// CHECK-NEXT: store float [[I]], ptr [[T1]]
-// CHECK-NEXT: [[T1:%.*]] = load i64, ptr [[TMP1]], align 8
-// CHECK-NEXT: store atomic i64 [[T1]], ptr [[DEST]] seq_cst, align 8
*fp = f;
-// CHECK-NEXT: ret void
}
typedef struct { short x, y, z, w; } S;
-// CHECK: define{{.*}} void @testStruct(ptr
void testStruct(_Atomic(S) *fp) {
-// CHECK: [[FP:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[X:%.*]] = alloca [[S:.*]], align 8
-// CHECK-NEXT: [[F:%.*]] = alloca [[S:%.*]], align 2
-// CHECK-NEXT: [[TMP0:%.*]] = alloca [[S]], align 8
-// CHECK-NEXT: store ptr
-
-// CHECK-NEXT: [[P:%.*]] = load ptr, ptr [[FP]]
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[P]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, ptr [[T0]], align 8
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[P]], i32 0, i32 1
-// CHECK-NEXT: store i16 2, ptr [[T0]], align 2
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[P]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, ptr [[T0]], align 4
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[P]], i32 0, i32 3
-// CHECK-NEXT: store i16 4, ptr [[T0]], align 2
+
__c11_atomic_init(fp, (S){1,2,3,4});
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[X]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, ptr [[T0]], align 8
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[X]], i32 0, i32 1
-// CHECK-NEXT: store i16 2, ptr [[T0]], align 2
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[X]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, ptr [[T0]], align 4
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[X]], i32 0, i32 3
-// CHECK-NEXT: store i16 4, ptr [[T0]], align 2
_Atomic(S) x = (S){1,2,3,4};
-// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
-// CHECK-NEXT: [[T2:%.*]] = load atomic i64, ptr [[T0]] seq_cst, align 8
-// CHECK-NEXT: store i64 [[T2]], ptr [[F]], align 2
S f = *fp;
-// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[TMP0]], ptr align 2 [[F]], i32 8, i1 false)
-// CHECK-NEXT: [[T4:%.*]] = load i64, ptr [[TMP0]], align 8
-// CHECK-NEXT: store atomic i64 [[T4]], ptr [[T0]] seq_cst, align 8
*fp = f;
-// CHECK-NEXT: ret void
}
typedef struct { short x, y, z; } PS;
-// CHECK: define{{.*}} void @testPromotedStruct(ptr
void testPromotedStruct(_Atomic(PS) *fp) {
-// CHECK: [[FP:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[X:%.*]] = alloca [[APS:.*]], align 8
-// CHECK-NEXT: [[F:%.*]] = alloca [[PS:%.*]], align 2
-// CHECK-NEXT: [[TMP0:%.*]] = alloca [[APS]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = alloca [[APS]], align 8
-// CHECK-NEXT: store ptr
-
-// CHECK-NEXT: [[P:%.*]] = load ptr, ptr [[FP]]
-// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[P]], i8 0, i64 8, i1 false)
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], ptr [[P]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, ptr [[T1]], align 8
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 1
-// CHECK-NEXT: store i16 2, ptr [[T1]], align 2
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, ptr [[T1]], align 4
+
__c11_atomic_init(fp, (PS){1,2,3});
-// CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 8 [[X]], i8 0, i32 8, i1 false)
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], ptr [[X]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, ptr [[T1]], align 8
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 1
-// CHECK-NEXT: store i16 2, ptr [[T1]], align 2
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, ptr [[T1]], align 4
_Atomic(PS) x = (PS){1,2,3};
-// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
-// CHECK-NEXT: [[T2:%.*]] = load atomic i64, ptr [[T0]] seq_cst, align 8
-// CHECK-NEXT: store i64 [[T2]], ptr [[TMP0]], align 8
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], ptr [[TMP0]], i32 0, i32 0
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[F]], ptr align 8 [[T0]], i32 6, i1 false)
PS f = *fp;
-// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
-// CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 8 [[TMP1]], i8 0, i32 8, i1 false)
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]], ptr [[TMP1]], i32 0, i32 0
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[T1]], ptr align 2 [[F]], i32 6, i1 false)
-// CHECK-NEXT: [[T5:%.*]] = load i64, ptr [[TMP1]], align 8
-// CHECK-NEXT: store atomic i64 [[T5]], ptr [[T0]] seq_cst, align 8
*fp = f;
-// CHECK-NEXT: ret void
}
PS test_promoted_load(_Atomic(PS) *addr) {
- // CHECK-LABEL: @test_promoted_load(ptr dead_on_unwind noalias writable sret(%struct.PS) align 2 %agg.result, ptr noundef %addr)
- // CHECK: [[ADDR_ARG:%.*]] = alloca ptr, align 4
- // CHECK: [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
- // CHECK: store ptr %addr, ptr [[ADDR_ARG]], align 4
- // CHECK: [[ADDR:%.*]] = load ptr, ptr [[ADDR_ARG]], align 4
- // CHECK: [[VAL:%.*]] = load atomic i64, ptr [[ADDR]] seq_cst, align 8
- // CHECK: store i64 [[VAL]], ptr [[ATOMIC_RES]], align 8
- // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 2 %agg.result, ptr align 8 [[ATOMIC_RES]], i32 6, i1 false)
return __c11_atomic_load(addr, 5);
}
void test_promoted_store(_Atomic(PS) *addr, PS *val) {
- // CHECK-LABEL: @test_promoted_store(ptr noundef %addr, ptr noundef %val)
- // CHECK: [[ADDR_ARG:%.*]] = alloca ptr, align 4
- // CHECK: [[VAL_ARG:%.*]] = alloca ptr, align 4
- // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
- // CHECK: [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
- // CHECK: store ptr %addr, ptr [[ADDR_ARG]], align 4
- // CHECK: store ptr %val, ptr [[VAL_ARG]], align 4
- // CHECK: [[ADDR:%.*]] = load ptr, ptr [[ADDR_ARG]], align 4
- // CHECK: [[VAL:%.*]] = load ptr, ptr [[VAL_ARG]], align 4
- // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[NONATOMIC_TMP]], ptr align 2 [[VAL]], i32 6, i1 false)
- // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_VAL]], ptr align 2 [[NONATOMIC_TMP]], i64 6, i1 false)
- // CHECK: [[VAL64:%.*]] = load i64, ptr [[ATOMIC_VAL]], align 8
- // CHECK: store atomic i64 [[VAL64]], ptr [[ADDR]] seq_cst, align 8
__c11_atomic_store(addr, *val, 5);
}
PS test_promoted_exchange(_Atomic(PS) *addr, PS *val) {
- // CHECK-LABEL: @test_promoted_exchange(ptr dead_on_unwind noalias writable sret(%struct.PS) align 2 %agg.result, ptr noundef %addr, ptr noundef %val)
- // CHECK: [[ADDR_ARG:%.*]] = alloca ptr, align 4
- // CHECK: [[VAL_ARG:%.*]] = alloca ptr, align 4
- // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
- // CHECK: [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
- // CHECK: [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
- // CHECK: store ptr %addr, ptr [[ADDR_ARG]], align 4
- // CHECK: store ptr %val, ptr [[VAL_ARG]], align 4
- // CHECK: [[ADDR:%.*]] = load ptr, ptr [[ADDR_ARG]], align 4
- // CHECK: [[VAL:%.*]] = load ptr, ptr [[VAL_ARG]], align 4
- // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[NONATOMIC_TMP]], ptr align 2 [[VAL]], i32 6, i1 false)
- // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_VAL]], ptr align 2 [[NONATOMIC_TMP]], i64 6, i1 false)
- // CHECK: [[VAL64:%.*]] = load i64, ptr [[ATOMIC_VAL]], align 8
- // CHECK: [[RES:%.*]] = atomicrmw xchg ptr [[ADDR]], i64 [[VAL64]] seq_cst, align 8
- // CHECK: store i64 [[RES]], ptr [[ATOMIC_RES]], align 8
- // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 2 %agg.result, ptr align 8 [[ATOMIC_RES]], i32 6, i1 false)
return __c11_atomic_exchange(addr, *val, 5);
}
_Bool test_promoted_cmpxchg(_Atomic(PS) *addr, PS *desired, PS *new) {
- // CHECK: define{{.*}} zeroext i1 @test_promoted_cmpxchg(ptr noundef %addr, ptr noundef %desired, ptr noundef %new) #0 {
- // CHECK: [[ADDR_ARG:%.*]] = alloca ptr, align 4
- // CHECK: [[DESIRED_ARG:%.*]] = alloca ptr, align 4
- // CHECK: [[NEW_ARG:%.*]] = alloca ptr, align 4
- // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
- // CHECK: [[ATOMIC_DESIRED:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
- // CHECK: [[ATOMIC_NEW:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
- // CHECK: [[RES_ADDR:%.*]] = alloca i8, align 1
- // CHECK: store ptr %addr, ptr [[ADDR_ARG]], align 4
- // CHECK: store ptr %desired, ptr [[DESIRED_ARG]], align 4
- // CHECK: store ptr %new, ptr [[NEW_ARG]], align 4
- // CHECK: [[ADDR:%.*]] = load ptr, ptr [[ADDR_ARG]], align 4
- // CHECK: [[DESIRED:%.*]] = load ptr, ptr [[DESIRED_ARG]], align 4
- // CHECK: [[NEW:%.*]] = load ptr, ptr [[NEW_ARG]], align 4
- // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[NONATOMIC_TMP]], ptr align 2 [[NEW]], i32 6, i1 false)
- // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_DESIRED:%.*]], ptr align 2 [[DESIRED]], i64 6, i1 false)
- // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_NEW]], ptr align 2 [[NONATOMIC_TMP]], i64 6, i1 false)
- // CHECK: [[ATOMIC_DESIRED_VAL64:%.*]] = load i64, ptr [[ATOMIC_DESIRED:%.*]], align 8
- // CHECK: [[ATOMIC_NEW_VAL64:%.*]] = load i64, ptr [[ATOMIC_NEW]], align 8
- // CHECK: [[RES:%.*]] = cmpxchg ptr [[ADDR]], i64 [[ATOMIC_DESIRED_VAL64]], i64 [[ATOMIC_NEW_VAL64]] seq_cst seq_cst, align 8
- // CHECK: [[RES_VAL64:%.*]] = extractvalue { i64, i1 } [[RES]], 0
- // CHECK: [[RES_BOOL:%.*]] = extractvalue { i64, i1 } [[RES]], 1
- // CHECK: br i1 [[RES_BOOL]], label {{%.*}}, label {{%.*}}
-
- // CHECK: store i64 [[RES_VAL64]], ptr [[ATOMIC_DESIRED]], align 8
- // CHECK: br label {{%.*}}
-
- // CHECK: [[RES_BOOL8:%.*]] = zext i1 [[RES_BOOL]] to i8
- // CHECK: store i8 [[RES_BOOL8]], ptr [[RES_ADDR]], align 1
- // CHECK: [[RES_BOOL8:%.*]] = load i8, ptr [[RES_ADDR]], align 1
- // CHECK: [[RETVAL:%.*]] = trunc i8 [[RES_BOOL8]] to i1
- // CHECK: ret i1 [[RETVAL]]
+
+
return __c11_atomic_compare_exchange_strong(addr, desired, *new, 5, 5);
}
@@ -266,15 +79,11 @@ _Bool test_promoted_cmpxchg(_Atomic(PS) *addr, PS *desired, PS *new) {
struct Empty {};
struct Empty testEmptyStructLoad(_Atomic(struct Empty)* empty) {
- // CHECK-LABEL: @testEmptyStructLoad(
- // CHECK-NOT: @__atomic_load
- // CHECK: load atomic i8, ptr %{{.*}} seq_cst, align 1
return *empty;
}
void testEmptyStructStore(_Atomic(struct Empty)* empty, struct Empty value) {
- // CHECK-LABEL: @testEmptyStructStore(
- // CHECK-NOT: @__atomic_store
- // CHECK: store atomic i8 %{{.*}}, ptr %{{.*}} seq_cst, align 1
*empty = value;
}
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// CHECK: {{.*}}
diff --git a/clang/test/CodeGen/c11atomics.c b/clang/test/CodeGen/c11atomics.c
index 4da36ad4da0f92..3c625f226a9d00 100644
--- a/clang/test/CodeGen/c11atomics.c
+++ b/clang/test/CodeGen/c11atomics.c
@@ -1,3 +1,4 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv5-unknown-freebsd -std=c11 | FileCheck %s
// Test that we are generating atomicrmw instructions, rather than
@@ -17,17 +18,13 @@ struct elem;
struct ptr {
struct elem *ptr;
};
-// CHECK-DAG: %struct.ptr = type { ptr }
struct elem {
_Atomic(struct ptr) link;
};
struct ptr object;
-// CHECK-DAG: @object ={{.*}} global %struct.ptr zeroinitializer
-// CHECK-DAG: @testStructGlobal ={{.*}} global {{.*}} { i16 1, i16 2, i16 3, i16 4 }
-// CHECK-DAG: @testPromotedStructGlobal ={{.*}} global {{.*}} { %{{.*}} { i16 1, i16 2, i16 3 }, [2 x i8] zeroinitializer }
typedef int __attribute__((vector_size(16))) vector;
@@ -40,386 +37,664 @@ _Atomic(char*) p;
_Atomic(float) f;
_Atomic(vector) v;
-// CHECK: testinc
+// CHECK-LABEL: define dso_local arm_aapcscc void @testinc(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw xchg ptr @b, i8 1 seq_cst, align 1
+// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr @i, i32 1 seq_cst, align 4
+// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw add ptr @l, i64 1 seq_cst, align 8
+// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw add ptr @s, i16 1 seq_cst, align 2
+// CHECK-NEXT: store atomic i8 1, ptr @b seq_cst, align 1
+// CHECK-NEXT: [[TMP4:%.*]] = atomicrmw add ptr @i, i32 1 seq_cst, align 4
+// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 1
+// CHECK-NEXT: [[TMP6:%.*]] = atomicrmw add ptr @l, i64 1 seq_cst, align 8
+// CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 1
+// CHECK-NEXT: [[TMP8:%.*]] = atomicrmw add ptr @s, i16 1 seq_cst, align 2
+// CHECK-NEXT: [[TMP9:%.*]] = add i16 [[TMP8]], 1
+// CHECK-NEXT: ret void
+//
void testinc(void)
{
// Special case for suffix bool++, sets to true and returns the old value.
- // CHECK: atomicrmw xchg ptr @b, i8 1 seq_cst, align 1
b++;
- // CHECK: atomicrmw add ptr @i, i32 1 seq_cst, align 4
i++;
- // CHECK: atomicrmw add ptr @l, i64 1 seq_cst, align 8
l++;
- // CHECK: atomicrmw add ptr @s, i16 1 seq_cst, align 2
s++;
// Prefix increment
// Special case for bool: set to true and return true
- // CHECK: store atomic i8 1, ptr @b seq_cst, align 1
++b;
// Currently, we have no variant of atomicrmw that returns the new value, so
// we have to generate an atomic add, which returns the old value, and then a
// non-atomic add.
- // CHECK: atomicrmw add ptr @i, i32 1 seq_cst, align 4
- // CHECK: add i32
++i;
- // CHECK: atomicrmw add ptr @l, i64 1 seq_cst, align 8
- // CHECK: add i64
++l;
- // CHECK: atomicrmw add ptr @s, i16 1 seq_cst, align 2
- // CHECK: add i16
++s;
}
-// CHECK: testdec
+// CHECK-LABEL: define dso_local arm_aapcscc void @testdec(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*]]:
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[ATOMIC_TEMP13:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i8, align 1
+// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 1, ptr noundef @b, ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK-NEXT: br label %[[ATOMIC_OP:.*]]
+// CHECK: [[ATOMIC_OP]]:
+// CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[STOREDV]], %[[ENTRY]] ], [ [[STOREDV5:%.*]], %[[ATOMIC_OP]] ]
+// CHECK-NEXT: [[DEC:%.*]] = add i8 [[TMP1]], -1
+// CHECK-NEXT: store i8 [[TMP1]], ptr [[ATOMIC_TEMP1]], align 1
+// CHECK-NEXT: store i8 [[DEC]], ptr [[ATOMIC_TEMP2]], align 1
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i8, ptr [[ATOMIC_TEMP1]], align 1
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i8, ptr [[ATOMIC_TEMP2]], align 1
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @b, i8 [[CMPXCHG_EXPECTED]], i8 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 1
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i8 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP3]], align 1
+// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP3]], align 1
+// CHECK-NEXT: [[LOADEDV4:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT: [[STOREDV5]] = zext i1 [[LOADEDV4]] to i8
+// CHECK-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// CHECK: [[ATOMIC_CONT]]:
+// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw sub ptr @i, i32 1 seq_cst, align 4
+// CHECK-NEXT: [[TMP4:%.*]] = atomicrmw sub ptr @l, i64 1 seq_cst, align 8
+// CHECK-NEXT: [[TMP5:%.*]] = atomicrmw sub ptr @s, i16 1 seq_cst, align 2
+// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 1, ptr noundef @b, ptr noundef [[ATOMIC_TEMP6]], i32 noundef 5)
+// CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[ATOMIC_TEMP6]], align 1
+// CHECK-NEXT: [[LOADEDV7:%.*]] = trunc i8 [[TMP6]] to i1
+// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV7]] to i8
+// CHECK-NEXT: br label %[[ATOMIC_OP8:.*]]
+// CHECK: [[ATOMIC_OP8]]:
+// CHECK-NEXT: [[TMP7:%.*]] = phi i8 [ [[STOREDV9]], %[[ATOMIC_CONT]] ], [ [[STOREDV21:%.*]], %[[ATOMIC_OP8]] ]
+// CHECK-NEXT: [[DEC10:%.*]] = add i8 [[TMP7]], -1
+// CHECK-NEXT: store i8 [[TMP7]], ptr [[ATOMIC_TEMP12]], align 1
+// CHECK-NEXT: store i8 [[DEC10]], ptr [[ATOMIC_TEMP13]], align 1
+// CHECK-NEXT: [[CMPXCHG_EXPECTED15:%.*]] = load i8, ptr [[ATOMIC_TEMP12]], align 1
+// CHECK-NEXT: [[CMPXCHG_DESIRED16:%.*]] = load i8, ptr [[ATOMIC_TEMP13]], align 1
+// CHECK-NEXT: [[CMPXCHG_PAIR17:%.*]] = cmpxchg ptr @b, i8 [[CMPXCHG_EXPECTED15]], i8 [[CMPXCHG_DESIRED16]] seq_cst seq_cst, align 1
+// CHECK-NEXT: [[CMPXCHG_PREV18:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR17]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS19:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR17]], 1
+// CHECK-NEXT: store i8 [[CMPXCHG_PREV18]], ptr [[ATOMIC_TEMP14]], align 1
+// CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ATOMIC_TEMP14]], align 1
+// CHECK-NEXT: [[LOADEDV20:%.*]] = trunc i8 [[TMP8]] to i1
+// CHECK-NEXT: [[STOREDV21]] = zext i1 [[LOADEDV20]] to i8
+// CHECK-NEXT: br i1 [[CMPXCHG_SUCCESS19]], label %[[ATOMIC_CONT11:.*]], label %[[ATOMIC_OP8]]
+// CHECK: [[ATOMIC_CONT11]]:
+// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw sub ptr @i, i32 1 seq_cst, align 4
+// CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[TMP9]], 1
+// CHECK-NEXT: [[TMP11:%.*]] = atomicrmw sub ptr @l, i64 1 seq_cst, align 8
+// CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP11]], 1
+// CHECK-NEXT: [[TMP13:%.*]] = atomicrmw sub ptr @s, i16 1 seq_cst, align 2
+// CHECK-NEXT: [[TMP14:%.*]] = sub i16 [[TMP13]], 1
+// CHECK-NEXT: ret void
+//
void testdec(void)
{
- // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 noundef 1, ptr noundef @b
b--;
- // CHECK: atomicrmw sub ptr @i, i32 1 seq_cst, align 4
i--;
- // CHECK: atomicrmw sub ptr @l, i64 1 seq_cst, align 8
l--;
- // CHECK: atomicrmw sub ptr @s, i16 1 seq_cst, align 2
s--;
- // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 noundef 1, ptr noundef @b
--b;
- // CHECK: atomicrmw sub ptr @i, i32 1 seq_cst, align 4
- // CHECK: sub i32
--i;
- // CHECK: atomicrmw sub ptr @l, i64 1 seq_cst, align 8
- // CHECK: sub i64
--l;
- // CHECK: atomicrmw sub ptr @s, i16 1 seq_cst, align 2
- // CHECK: sub i16
--s;
}
-// CHECK: testaddeq
+// CHECK-LABEL: define dso_local arm_aapcscc void @testaddeq(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*]]:
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i8, align 1
+// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 1, ptr noundef @b, ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK-NEXT: br label %[[ATOMIC_OP:.*]]
+// CHECK: [[ATOMIC_OP]]:
+// CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[STOREDV]], %[[ENTRY]] ], [ [[STOREDV6:%.*]], %[[ATOMIC_OP]] ]
+// CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i32
+// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 42
+// CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i8
+// CHECK-NEXT: store i8 [[TMP1]], ptr [[ATOMIC_TEMP2]], align 1
+// CHECK-NEXT: store i8 [[CONV1]], ptr [[ATOMIC_TEMP3]], align 1
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i8, ptr [[ATOMIC_TEMP2]], align 1
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i8, ptr [[ATOMIC_TEMP3]], align 1
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @b, i8 [[CMPXCHG_EXPECTED]], i8 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 1
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i8 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP4]], align 1
+// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP4]], align 1
+// CHECK-NEXT: [[LOADEDV5:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT: [[STOREDV6]] = zext i1 [[LOADEDV5]] to i8
+// CHECK-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// CHECK: [[ATOMIC_CONT]]:
+// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw add ptr @i, i32 42 seq_cst, align 4
+// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 42
+// CHECK-NEXT: [[TMP5:%.*]] = atomicrmw add ptr @l, i64 42 seq_cst, align 8
+// CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 42
+// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw add ptr @s, i16 42 seq_cst, align 2
+// CHECK-NEXT: [[TMP8:%.*]] = add i16 [[TMP7]], 42
+// CHECK-NEXT: ret void
+//
void testaddeq(void)
{
- // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 noundef 1, ptr noundef @b
- // CHECK: atomicrmw add ptr @i, i32 42 seq_cst, align 4
- // CHECK: atomicrmw add ptr @l, i64 42 seq_cst, align 8
- // CHECK: atomicrmw add ptr @s, i16 42 seq_cst, align 2
b += 42;
i += 42;
l += 42;
s += 42;
}
-// CHECK: testsubeq
+// CHECK-LABEL: define dso_local arm_aapcscc void @testsubeq(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*]]:
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i8, align 1
+// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 1, ptr noundef @b, ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK-NEXT: br label %[[ATOMIC_OP:.*]]
+// CHECK: [[ATOMIC_OP]]:
+// CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[STOREDV]], %[[ENTRY]] ], [ [[STOREDV6:%.*]], %[[ATOMIC_OP]] ]
+// CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i32
+// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[CONV]], 42
+// CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[SUB]] to i8
+// CHECK-NEXT: store i8 [[TMP1]], ptr [[ATOMIC_TEMP2]], align 1
+// CHECK-NEXT: store i8 [[CONV1]], ptr [[ATOMIC_TEMP3]], align 1
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i8, ptr [[ATOMIC_TEMP2]], align 1
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i8, ptr [[ATOMIC_TEMP3]], align 1
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @b, i8 [[CMPXCHG_EXPECTED]], i8 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 1
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i8 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP4]], align 1
+// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP4]], align 1
+// CHECK-NEXT: [[LOADEDV5:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT: [[STOREDV6]] = zext i1 [[LOADEDV5]] to i8
+// CHECK-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// CHECK: [[ATOMIC_CONT]]:
+// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw sub ptr @i, i32 42 seq_cst, align 4
+// CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 42
+// CHECK-NEXT: [[TMP5:%.*]] = atomicrmw sub ptr @l, i64 42 seq_cst, align 8
+// CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 42
+// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw sub ptr @s, i16 42 seq_cst, align 2
+// CHECK-NEXT: [[TMP8:%.*]] = sub i16 [[TMP7]], 42
+// CHECK-NEXT: ret void
+//
void testsubeq(void)
{
- // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 noundef 1, ptr noundef @b
- // CHECK: atomicrmw sub ptr @i, i32 42 seq_cst, align 4
- // CHECK: atomicrmw sub ptr @l, i64 42 seq_cst, align 8
- // CHECK: atomicrmw sub ptr @s, i16 42 seq_cst, align 2
b -= 42;
i -= 42;
l -= 42;
s -= 42;
}
-// CHECK: testxoreq
+// CHECK-LABEL: define dso_local arm_aapcscc void @testxoreq(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*]]:
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i8, align 1
+// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 1, ptr noundef @b, ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK-NEXT: br label %[[ATOMIC_OP:.*]]
+// CHECK: [[ATOMIC_OP]]:
+// CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[STOREDV]], %[[ENTRY]] ], [ [[STOREDV6:%.*]], %[[ATOMIC_OP]] ]
+// CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i32
+// CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 42
+// CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[XOR]] to i8
+// CHECK-NEXT: store i8 [[TMP1]], ptr [[ATOMIC_TEMP2]], align 1
+// CHECK-NEXT: store i8 [[CONV1]], ptr [[ATOMIC_TEMP3]], align 1
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i8, ptr [[ATOMIC_TEMP2]], align 1
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i8, ptr [[ATOMIC_TEMP3]], align 1
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @b, i8 [[CMPXCHG_EXPECTED]], i8 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 1
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i8 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP4]], align 1
+// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP4]], align 1
+// CHECK-NEXT: [[LOADEDV5:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT: [[STOREDV6]] = zext i1 [[LOADEDV5]] to i8
+// CHECK-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// CHECK: [[ATOMIC_CONT]]:
+// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw xor ptr @i, i32 42 seq_cst, align 4
+// CHECK-NEXT: [[TMP4:%.*]] = xor i32 [[TMP3]], 42
+// CHECK-NEXT: [[TMP5:%.*]] = atomicrmw xor ptr @l, i64 42 seq_cst, align 8
+// CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 42
+// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw xor ptr @s, i16 42 seq_cst, align 2
+// CHECK-NEXT: [[TMP8:%.*]] = xor i16 [[TMP7]], 42
+// CHECK-NEXT: ret void
+//
void testxoreq(void)
{
- // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 noundef 1, ptr noundef @b
- // CHECK: atomicrmw xor ptr @i, i32 42 seq_cst, align 4
- // CHECK: atomicrmw xor ptr @l, i64 42 seq_cst, align 8
- // CHECK: atomicrmw xor ptr @s, i16 42 seq_cst, align 2
b ^= 42;
i ^= 42;
l ^= 42;
s ^= 42;
}
-// CHECK: testoreq
+// CHECK-LABEL: define dso_local arm_aapcscc void @testoreq(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*]]:
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i8, align 1
+// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 1, ptr noundef @b, ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK-NEXT: br label %[[ATOMIC_OP:.*]]
+// CHECK: [[ATOMIC_OP]]:
+// CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[STOREDV]], %[[ENTRY]] ], [ [[STOREDV6:%.*]], %[[ATOMIC_OP]] ]
+// CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i32
+// CHECK-NEXT: [[OR:%.*]] = or i32 [[CONV]], 42
+// CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[OR]] to i8
+// CHECK-NEXT: store i8 [[TMP1]], ptr [[ATOMIC_TEMP2]], align 1
+// CHECK-NEXT: store i8 [[CONV1]], ptr [[ATOMIC_TEMP3]], align 1
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i8, ptr [[ATOMIC_TEMP2]], align 1
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i8, ptr [[ATOMIC_TEMP3]], align 1
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @b, i8 [[CMPXCHG_EXPECTED]], i8 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 1
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i8 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP4]], align 1
+// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP4]], align 1
+// CHECK-NEXT: [[LOADEDV5:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT: [[STOREDV6]] = zext i1 [[LOADEDV5]] to i8
+// CHECK-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// CHECK: [[ATOMIC_CONT]]:
+// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw or ptr @i, i32 42 seq_cst, align 4
+// CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], 42
+// CHECK-NEXT: [[TMP5:%.*]] = atomicrmw or ptr @l, i64 42 seq_cst, align 8
+// CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP5]], 42
+// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw or ptr @s, i16 42 seq_cst, align 2
+// CHECK-NEXT: [[TMP8:%.*]] = or i16 [[TMP7]], 42
+// CHECK-NEXT: ret void
+//
void testoreq(void)
{
- // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 noundef 1, ptr noundef @b
- // CHECK: atomicrmw or ptr @i, i32 42 seq_cst, align 4
- // CHECK: atomicrmw or ptr @l, i64 42 seq_cst, align 8
- // CHECK: atomicrmw or ptr @s, i16 42 seq_cst, align 2
b |= 42;
i |= 42;
l |= 42;
s |= 42;
}
-// CHECK: testandeq
+// CHECK-LABEL: define dso_local arm_aapcscc void @testandeq(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*]]:
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i8, align 1
+// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 1, ptr noundef @b, ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK-NEXT: br label %[[ATOMIC_OP:.*]]
+// CHECK: [[ATOMIC_OP]]:
+// CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[STOREDV]], %[[ENTRY]] ], [ [[STOREDV6:%.*]], %[[ATOMIC_OP]] ]
+// CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i32
+// CHECK-NEXT: [[AND:%.*]] = and i32 [[CONV]], 42
+// CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[AND]] to i8
+// CHECK-NEXT: store i8 [[TMP1]], ptr [[ATOMIC_TEMP2]], align 1
+// CHECK-NEXT: store i8 [[CONV1]], ptr [[ATOMIC_TEMP3]], align 1
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i8, ptr [[ATOMIC_TEMP2]], align 1
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i8, ptr [[ATOMIC_TEMP3]], align 1
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @b, i8 [[CMPXCHG_EXPECTED]], i8 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 1
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i8 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP4]], align 1
+// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP4]], align 1
+// CHECK-NEXT: [[LOADEDV5:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT: [[STOREDV6]] = zext i1 [[LOADEDV5]] to i8
+// CHECK-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// CHECK: [[ATOMIC_CONT]]:
+// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw and ptr @i, i32 42 seq_cst, align 4
+// CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 42
+// CHECK-NEXT: [[TMP5:%.*]] = atomicrmw and ptr @l, i64 42 seq_cst, align 8
+// CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 42
+// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw and ptr @s, i16 42 seq_cst, align 2
+// CHECK-NEXT: [[TMP8:%.*]] = and i16 [[TMP7]], 42
+// CHECK-NEXT: ret void
+//
void testandeq(void)
{
- // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 noundef 1, ptr noundef @b
- // CHECK: atomicrmw and ptr @i, i32 42 seq_cst, align 4
- // CHECK: atomicrmw and ptr @l, i64 42 seq_cst, align 8
- // CHECK: atomicrmw and ptr @s, i16 42 seq_cst, align 2
b &= 42;
i &= 42;
l &= 42;
s &= 42;
}
-// CHECK-LABEL: define{{.*}} arm_aapcscc void @testFloat(ptr
+// CHECK-LABEL: define dso_local arm_aapcscc void @testFloat(
+// CHECK-SAME: ptr noundef [[FP:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[X:%.*]] = alloca float, align 4
+// CHECK-NEXT: [[F:%.*]] = alloca float, align 4
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
+// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca float, align 4
+// CHECK-NEXT: store ptr [[FP]], ptr [[FP_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
+// CHECK-NEXT: store float 1.000000e+00, ptr [[TMP0]], align 4
+// CHECK-NEXT: store float 2.000000e+00, ptr [[X]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
+// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 4, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: store float [[TMP2]], ptr [[F]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
+// CHECK-NEXT: store float [[TMP3]], ptr [[ATOMIC_TEMP1]], align 4
+// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 noundef 4, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 5)
+// CHECK-NEXT: ret void
+//
void testFloat(_Atomic(float) *fp) {
-// CHECK: [[FP:%.*]] = alloca ptr
-// CHECK-NEXT: [[X:%.*]] = alloca float
-// CHECK-NEXT: [[F:%.*]] = alloca float
-// CHECK-NEXT: [[TMP0:%.*]] = alloca float
-// CHECK-NEXT: [[TMP1:%.*]] = alloca float
-// CHECK-NEXT: store ptr {{%.*}}, ptr [[FP]]
-
-// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
-// CHECK-NEXT: store float 1.000000e+00, ptr [[T0]], align 4
+
__c11_atomic_init(fp, 1.0f);
-// CHECK-NEXT: store float 2.000000e+00, ptr [[X]], align 4
_Atomic(float) x = 2.0f;
-// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
-// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 4, ptr noundef [[T0]], ptr noundef [[TMP0]], i32 noundef 5)
-// CHECK-NEXT: [[T3:%.*]] = load float, ptr [[TMP0]], align 4
-// CHECK-NEXT: store float [[T3]], ptr [[F]]
float f = *fp;
-// CHECK-NEXT: [[T0:%.*]] = load float, ptr [[F]], align 4
-// CHECK-NEXT: [[T1:%.*]] = load ptr, ptr [[FP]], align 4
-// CHECK-NEXT: store float [[T0]], ptr [[TMP1]], align 4
-// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 noundef 4, ptr noundef [[T1]], ptr noundef [[TMP1]], i32 noundef 5)
*fp = f;
-// CHECK-NEXT: ret void
}
-// CHECK: define{{.*}} arm_aapcscc void @testComplexFloat(ptr
+// CHECK-LABEL: define dso_local arm_aapcscc void @testComplexFloat(
+// CHECK-SAME: ptr noundef [[FP:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[X:%.*]] = alloca { float, float }, align 8
+// CHECK-NEXT: [[F:%.*]] = alloca { float, float }, align 4
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { float, float }, align 8
+// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca { float, float }, align 8
+// CHECK-NEXT: store ptr [[FP]], ptr [[FP_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
+// CHECK-NEXT: [[DOTREALP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: [[DOTIMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP0]], i32 0, i32 1
+// CHECK-NEXT: store float 1.000000e+00, ptr [[DOTREALP]], align 8
+// CHECK-NEXT: store float 0.000000e+00, ptr [[DOTIMAGP]], align 4
+// CHECK-NEXT: [[X_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[X]], i32 0, i32 0
+// CHECK-NEXT: [[X_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[X]], i32 0, i32 1
+// CHECK-NEXT: store float 2.000000e+00, ptr [[X_REALP]], align 8
+// CHECK-NEXT: store float 0.000000e+00, ptr [[X_IMAGP]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
+// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 8, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
+// CHECK-NEXT: [[ATOMIC_TEMP_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[ATOMIC_TEMP]], i32 0, i32 0
+// CHECK-NEXT: [[ATOMIC_TEMP_REAL:%.*]] = load float, ptr [[ATOMIC_TEMP_REALP]], align 8
+// CHECK-NEXT: [[ATOMIC_TEMP_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[ATOMIC_TEMP]], i32 0, i32 1
+// CHECK-NEXT: [[ATOMIC_TEMP_IMAG:%.*]] = load float, ptr [[ATOMIC_TEMP_IMAGP]], align 4
+// CHECK-NEXT: [[F_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[F]], i32 0, i32 0
+// CHECK-NEXT: [[F_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[F]], i32 0, i32 1
+// CHECK-NEXT: store float [[ATOMIC_TEMP_REAL]], ptr [[F_REALP]], align 4
+// CHECK-NEXT: store float [[ATOMIC_TEMP_IMAG]], ptr [[F_IMAGP]], align 4
+// CHECK-NEXT: [[F_REALP1:%.*]] = getelementptr inbounds { float, float }, ptr [[F]], i32 0, i32 0
+// CHECK-NEXT: [[F_REAL:%.*]] = load float, ptr [[F_REALP1]], align 4
+// CHECK-NEXT: [[F_IMAGP2:%.*]] = getelementptr inbounds { float, float }, ptr [[F]], i32 0, i32 1
+// CHECK-NEXT: [[F_IMAG:%.*]] = load float, ptr [[F_IMAGP2]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
+// CHECK-NEXT: [[ATOMIC_TEMP3_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[ATOMIC_TEMP3]], i32 0, i32 0
+// CHECK-NEXT: [[ATOMIC_TEMP3_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[ATOMIC_TEMP3]], i32 0, i32 1
+// CHECK-NEXT: store float [[F_REAL]], ptr [[ATOMIC_TEMP3_REALP]], align 8
+// CHECK-NEXT: store float [[F_IMAG]], ptr [[ATOMIC_TEMP3_IMAGP]], align 4
+// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 noundef 8, ptr noundef [[TMP2]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef 5)
+// CHECK-NEXT: ret void
+//
void testComplexFloat(_Atomic(_Complex float) *fp) {
-// CHECK: [[FP:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[X:%.*]] = alloca [[CF:{ float, float }]], align 8
-// CHECK-NEXT: [[F:%.*]] = alloca [[CF]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = alloca [[CF]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = alloca [[CF]], align 8
-// CHECK-NEXT: store ptr
-
-// CHECK-NEXT: [[P:%.*]] = load ptr, ptr [[FP]]
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[P]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], ptr [[P]], i32 0, i32 1
-// CHECK-NEXT: store float 1.000000e+00, ptr [[T0]]
-// CHECK-NEXT: store float 0.000000e+00, ptr [[T1]]
+
__c11_atomic_init(fp, 1.0f);
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[X]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], ptr [[X]], i32 0, i32 1
-// CHECK-NEXT: store float 2.000000e+00, ptr [[T0]]
-// CHECK-NEXT: store float 0.000000e+00, ptr [[T1]]
_Atomic(_Complex float) x = 2.0f;
-// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
-// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 8, ptr noundef [[T0]], ptr noundef [[TMP0]], i32 noundef 5)
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[TMP0]], i32 0, i32 0
-// CHECK-NEXT: [[R:%.*]] = load float, ptr [[T0]]
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[TMP0]], i32 0, i32 1
-// CHECK-NEXT: [[I:%.*]] = load float, ptr [[T0]]
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[F]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], ptr [[F]], i32 0, i32 1
-// CHECK-NEXT: store float [[R]], ptr [[T0]]
-// CHECK-NEXT: store float [[I]], ptr [[T1]]
_Complex float f = *fp;
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[F]], i32 0, i32 0
-// CHECK-NEXT: [[R:%.*]] = load float, ptr [[T0]]
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[F]], i32 0, i32 1
-// CHECK-NEXT: [[I:%.*]] = load float, ptr [[T0]]
-// CHECK-NEXT: [[DEST:%.*]] = load ptr, ptr [[FP]], align 4
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[TMP1]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], ptr [[TMP1]], i32 0, i32 1
-// CHECK-NEXT: store float [[R]], ptr [[T0]]
-// CHECK-NEXT: store float [[I]], ptr [[T1]]
-// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 noundef 8, ptr noundef [[DEST]], ptr noundef [[TMP1]], i32 noundef 5)
*fp = f;
-// CHECK-NEXT: ret void
}
typedef struct { short x, y, z, w; } S;
_Atomic S testStructGlobal = (S){1, 2, 3, 4};
-// CHECK: define{{.*}} arm_aapcscc void @testStruct(ptr
+// CHECK-LABEL: define dso_local arm_aapcscc void @testStruct(
+// CHECK-SAME: ptr noundef [[FP:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[X1:%.*]] = alloca [[STRUCT_S:%.*]], align 8
+// CHECK-NEXT: [[F:%.*]] = alloca [[STRUCT_S]], align 2
+// CHECK-NEXT: [[AGG_TMP_ENSURED:%.*]] = alloca [[STRUCT_S]], align 8
+// CHECK-NEXT: store ptr [[FP]], ptr [[FP_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
+// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: store i16 1, ptr [[X]], align 8
+// CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP0]], i32 0, i32 1
+// CHECK-NEXT: store i16 2, ptr [[Y]], align 2
+// CHECK-NEXT: [[Z:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP0]], i32 0, i32 2
+// CHECK-NEXT: store i16 3, ptr [[Z]], align 4
+// CHECK-NEXT: [[W:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP0]], i32 0, i32 3
+// CHECK-NEXT: store i16 4, ptr [[W]], align 2
+// CHECK-NEXT: [[X2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[X1]], i32 0, i32 0
+// CHECK-NEXT: store i16 1, ptr [[X2]], align 8
+// CHECK-NEXT: [[Y3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[X1]], i32 0, i32 1
+// CHECK-NEXT: store i16 2, ptr [[Y3]], align 2
+// CHECK-NEXT: [[Z4:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[X1]], i32 0, i32 2
+// CHECK-NEXT: store i16 3, ptr [[Z4]], align 4
+// CHECK-NEXT: [[W5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[X1]], i32 0, i32 3
+// CHECK-NEXT: store i16 4, ptr [[W5]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
+// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 8, ptr noundef [[TMP1]], ptr noundef [[F]], i32 noundef 5)
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[AGG_TMP_ENSURED]], ptr align 2 [[F]], i32 8, i1 false)
+// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 noundef 8, ptr noundef [[TMP2]], ptr noundef [[AGG_TMP_ENSURED]], i32 noundef 5)
+// CHECK-NEXT: ret void
+//
void testStruct(_Atomic(S) *fp) {
-// CHECK: [[FP:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[X:%.*]] = alloca [[S:.*]], align 8
-// CHECK-NEXT: [[F:%.*]] = alloca [[S:%.*]], align 2
-// CHECK-NEXT: [[TMP0:%.*]] = alloca [[S]], align 8
-// CHECK-NEXT: store ptr
-
-// CHECK-NEXT: [[P:%.*]] = load ptr, ptr [[FP]]
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[P]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, ptr [[T0]], align 8
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[P]], i32 0, i32 1
-// CHECK-NEXT: store i16 2, ptr [[T0]], align 2
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[P]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, ptr [[T0]], align 4
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[P]], i32 0, i32 3
-// CHECK-NEXT: store i16 4, ptr [[T0]], align 2
+
__c11_atomic_init(fp, (S){1,2,3,4});
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[X]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, ptr [[T0]], align 8
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[X]], i32 0, i32 1
-// CHECK-NEXT: store i16 2, ptr [[T0]], align 2
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[X]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, ptr [[T0]], align 4
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[X]], i32 0, i32 3
-// CHECK-NEXT: store i16 4, ptr [[T0]], align 2
_Atomic(S) x = (S){1,2,3,4};
-// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
-// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 8, ptr noundef [[T0]], ptr noundef [[F]], i32 noundef 5)
S f = *fp;
-// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[TMP0]], ptr align 2 [[F]], i32 8, i1 false)
-// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 noundef 8, ptr noundef [[T0]], ptr noundef [[TMP0]], i32 noundef 5)
*fp = f;
-// CHECK-NEXT: ret void
}
typedef struct { short x, y, z; } PS;
_Atomic PS testPromotedStructGlobal = (PS){1, 2, 3};
-// CHECK: define{{.*}} arm_aapcscc void @testPromotedStruct(ptr
+// CHECK-LABEL: define dso_local arm_aapcscc void @testPromotedStruct(
+// CHECK-SAME: ptr noundef [[FP:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[X1:%.*]] = alloca { [[STRUCT_PS:%.*]], [2 x i8] }, align 8
+// CHECK-NEXT: [[F:%.*]] = alloca [[STRUCT_PS]], align 2
+// CHECK-NEXT: [[ATOMIC_TO_NONATOMIC_TEMP:%.*]] = alloca { [[STRUCT_PS]], [2 x i8] }, align 8
+// CHECK-NEXT: [[AGG_TMP_ENSURED:%.*]] = alloca { [[STRUCT_PS]], [2 x i8] }, align 8
+// CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_PS]], align 2
+// CHECK-NEXT: [[ATOMIC_TO_NONATOMIC_TEMP5:%.*]] = alloca { [[STRUCT_PS]], [2 x i8] }, align 8
+// CHECK-NEXT: store ptr [[FP]], ptr [[FP_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP0]], i8 0, i64 8, i1 false)
+// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds { [[STRUCT_PS]], [2 x i8] }, ptr [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_PS]], ptr [[TMP1]], i32 0, i32 0
+// CHECK-NEXT: store i16 1, ptr [[X]], align 8
+// CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_PS]], ptr [[TMP1]], i32 0, i32 1
+// CHECK-NEXT: store i16 2, ptr [[Y]], align 2
+// CHECK-NEXT: [[Z:%.*]] = getelementptr inbounds [[STRUCT_PS]], ptr [[TMP1]], i32 0, i32 2
+// CHECK-NEXT: store i16 3, ptr [[Z]], align 4
+// CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 8 [[X1]], i8 0, i32 8, i1 false)
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds { [[STRUCT_PS]], [2 x i8] }, ptr [[X1]], i32 0, i32 0
+// CHECK-NEXT: [[X2:%.*]] = getelementptr inbounds [[STRUCT_PS]], ptr [[TMP2]], i32 0, i32 0
+// CHECK-NEXT: store i16 1, ptr [[X2]], align 8
+// CHECK-NEXT: [[Y3:%.*]] = getelementptr inbounds [[STRUCT_PS]], ptr [[TMP2]], i32 0, i32 1
+// CHECK-NEXT: store i16 2, ptr [[Y3]], align 2
+// CHECK-NEXT: [[Z4:%.*]] = getelementptr inbounds [[STRUCT_PS]], ptr [[TMP2]], i32 0, i32 2
+// CHECK-NEXT: store i16 3, ptr [[Z4]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
+// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 8, ptr noundef [[TMP3]], ptr noundef [[ATOMIC_TO_NONATOMIC_TEMP]], i32 noundef 5)
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds { [[STRUCT_PS]], [2 x i8] }, ptr [[ATOMIC_TO_NONATOMIC_TEMP]], i32 0, i32 0
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[F]], ptr align 8 [[TMP4]], i32 6, i1 false)
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 8 [[AGG_TMP_ENSURED]], i8 0, i32 8, i1 false)
+// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds { [[STRUCT_PS]], [2 x i8] }, ptr [[AGG_TMP_ENSURED]], i32 0, i32 0
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[TMP6]], ptr align 2 [[F]], i32 6, i1 false)
+// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 noundef 8, ptr noundef [[TMP5]], ptr noundef [[AGG_TMP_ENSURED]], i32 noundef 5)
+// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
+// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 8, ptr noundef [[TMP7]], ptr noundef [[ATOMIC_TO_NONATOMIC_TEMP5]], i32 noundef 5)
+// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds { [[STRUCT_PS]], [2 x i8] }, ptr [[ATOMIC_TO_NONATOMIC_TEMP5]], i32 0, i32 0
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[TMP]], ptr align 8 [[TMP8]], i32 6, i1 false)
+// CHECK-NEXT: [[X6:%.*]] = getelementptr inbounds [[STRUCT_PS]], ptr [[TMP]], i32 0, i32 0
+// CHECK-NEXT: [[TMP9:%.*]] = load i16, ptr [[X6]], align 2
+// CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32
+// CHECK-NEXT: store i32 [[CONV]], ptr [[A]], align 4
+// CHECK-NEXT: ret void
+//
void testPromotedStruct(_Atomic(PS) *fp) {
-// CHECK: [[FP:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[X:%.*]] = alloca [[APS:.*]], align 8
-// CHECK-NEXT: [[F:%.*]] = alloca [[PS:%.*]], align 2
-// CHECK-NEXT: [[TMP0:%.*]] = alloca [[APS]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = alloca [[APS]], align 8
-// CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[TMP2:%.*]] = alloca %struct.PS, align 2
-// CHECK-NEXT: [[TMP3:%.*]] = alloca [[APS]], align 8
-// CHECK-NEXT: store ptr
-
-// CHECK-NEXT: [[P:%.*]] = load ptr, ptr [[FP]]
-// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[P]], i8 0, i64 8, i1 false)
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], ptr [[P]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, ptr [[T1]], align 8
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 1
-// CHECK-NEXT: store i16 2, ptr [[T1]], align 2
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, ptr [[T1]], align 4
+
__c11_atomic_init(fp, (PS){1,2,3});
-// CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 8 [[X]], i8 0, i32 8, i1 false)
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], ptr [[X]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, ptr [[T1]], align 8
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 1
-// CHECK-NEXT: store i16 2, ptr [[T1]], align 2
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, ptr [[T1]], align 4
_Atomic(PS) x = (PS){1,2,3};
-// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
-// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 8, ptr noundef [[T0]], ptr noundef [[TMP0]], i32 noundef 5)
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], ptr [[TMP0]], i32 0, i32 0
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[F]], ptr align 8 [[T0]], i32 6, i1 false)
PS f = *fp;
-// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
-// CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 8 [[TMP1]], i8 0, i32 8, i1 false)
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]], ptr [[TMP1]], i32 0, i32 0
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[T1]], ptr align 2 [[F]], i32 6, i1 false)
-// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 noundef 8, ptr noundef [[T0]], ptr noundef [[TMP1]], i32 noundef 5)
*fp = f;
-// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]], align 4
-// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 8, ptr noundef [[T0]], ptr noundef [[TMP3]], i32 noundef 5)
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], ptr [[TMP3]], i32 0, i32 0
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[TMP2]], ptr align 8 [[T0]], i32 6, i1 false)
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds %struct.PS, ptr [[TMP2]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = load i16, ptr [[T0]], align 2
-// CHECK-NEXT: [[T2:%.*]] = sext i16 [[T1]] to i32
-// CHECK-NEXT: store i32 [[T2]], ptr [[A]], align 4
int a = ((PS)*fp).x;
-// CHECK-NEXT: ret void
}
+// CHECK-LABEL: define dso_local arm_aapcscc void @test_promoted_load(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_PS:%.*]]) align 2 [[AGG_RESULT:%.*]], ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { [[STRUCT_PS]], [2 x i8] }, align 8
+// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load atomic i64, ptr [[TMP0]] seq_cst, align 8
+// CHECK-NEXT: store i64 [[TMP1]], ptr [[ATOMIC_TEMP]], align 8
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[AGG_RESULT]], ptr align 8 [[ATOMIC_TEMP]], i32 6, i1 false)
+// CHECK-NEXT: ret void
+//
PS test_promoted_load(_Atomic(PS) *addr) {
- // CHECK-LABEL: @test_promoted_load(ptr dead_on_unwind noalias writable sret(%struct.PS) align 2 %agg.result, ptr noundef %addr)
- // CHECK: [[ADDR_ARG:%.*]] = alloca ptr, align 4
- // CHECK: [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
- // CHECK: store ptr %addr, ptr [[ADDR_ARG]], align 4
- // CHECK: [[ADDR:%.*]] = load ptr, ptr [[ADDR_ARG]], align 4
- // CHECK: [[ATOMIC_RES:%.*]] = load atomic i64, ptr [[ADDR]] seq_cst, align 8
- // CHECK: store i64 [[ATOMIC_RES]], ptr [[ATOMIC_RES_ADDR:%.*]], align 8
- // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 2 %agg.result, ptr align 8 [[ATOMIC_RES_ADDR]], i32 6, i1 false)
return __c11_atomic_load(addr, 5);
}
+// CHECK-LABEL: define dso_local arm_aapcscc void @test_promoted_store(
+// CHECK-SAME: ptr noundef [[ADDR:%.*]], ptr noundef [[VAL:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca [[STRUCT_PS:%.*]], align 2
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { [[STRUCT_PS]], [2 x i8] }, align 8
+// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
+// CHECK-NEXT: store ptr [[VAL]], ptr [[VAL_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAL_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[DOTATOMICTMP]], ptr align 2 [[TMP1]], i32 6, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_TEMP]], ptr align 2 [[DOTATOMICTMP]], i64 6, i1 false)
+// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[ATOMIC_TEMP]], align 8
+// CHECK-NEXT: store atomic i64 [[TMP2]], ptr [[TMP0]] seq_cst, align 8
+// CHECK-NEXT: ret void
+//
void test_promoted_store(_Atomic(PS) *addr, PS *val) {
- // CHECK-LABEL: @test_promoted_store(ptr noundef %addr, ptr noundef %val)
- // CHECK: [[ADDR_ARG:%.*]] = alloca ptr, align 4
- // CHECK: [[VAL_ARG:%.*]] = alloca ptr, align 4
- // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
- // CHECK: [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
- // CHECK: store ptr %addr, ptr [[ADDR_ARG]], align 4
- // CHECK: store ptr %val, ptr [[VAL_ARG]], align 4
- // CHECK: [[ADDR:%.*]] = load ptr, ptr [[ADDR_ARG]], align 4
- // CHECK: [[VAL:%.*]] = load ptr, ptr [[VAL_ARG]], align 4
- // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[NONATOMIC_TMP]], ptr align 2 [[VAL]], i32 6, i1 false)
- // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_VAL]], ptr align 2 [[NONATOMIC_TMP]], i64 6, i1 false)
- // CHECK: [[ATOMIC:%.*]] = load i64, ptr [[ATOMIC_VAL]], align 8
- // CHECK: store atomic i64 [[ATOMIC]], ptr [[ADDR]] seq_cst, align 8
__c11_atomic_store(addr, *val, 5);
}
+// CHECK-LABEL: define dso_local arm_aapcscc void @test_promoted_exchange(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_PS:%.*]]) align 2 [[AGG_RESULT:%.*]], ptr noundef [[ADDR:%.*]], ptr noundef [[VAL:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca [[STRUCT_PS]], align 2
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { [[STRUCT_PS]], [2 x i8] }, align 8
+// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca { [[STRUCT_PS]], [2 x i8] }, align 8
+// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
+// CHECK-NEXT: store ptr [[VAL]], ptr [[VAL_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAL_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[DOTATOMICTMP]], ptr align 2 [[TMP1]], i32 6, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_TEMP]], ptr align 2 [[DOTATOMICTMP]], i64 6, i1 false)
+// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[ATOMIC_TEMP]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw xchg ptr [[TMP0]], i64 [[TMP2]] seq_cst, align 8
+// CHECK-NEXT: store i64 [[TMP3]], ptr [[ATOMIC_TEMP1]], align 8
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[AGG_RESULT]], ptr align 8 [[ATOMIC_TEMP1]], i32 6, i1 false)
+// CHECK-NEXT: ret void
+//
PS test_promoted_exchange(_Atomic(PS) *addr, PS *val) {
- // CHECK-LABEL: @test_promoted_exchange(ptr dead_on_unwind noalias writable sret(%struct.PS) align 2 %agg.result, ptr noundef %addr, ptr noundef %val)
- // CHECK: [[ADDR_ARG:%.*]] = alloca ptr, align 4
- // CHECK: [[VAL_ARG:%.*]] = alloca ptr, align 4
- // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
- // CHECK: [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
- // CHECK: [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
- // CHECK: store ptr %addr, ptr [[ADDR_ARG]], align 4
- // CHECK: store ptr %val, ptr [[VAL_ARG]], align 4
- // CHECK: [[ADDR:%.*]] = load ptr, ptr [[ADDR_ARG]], align 4
- // CHECK: [[VAL:%.*]] = load ptr, ptr [[VAL_ARG]], align 4
- // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[NONATOMIC_TMP]], ptr align 2 [[VAL]], i32 6, i1 false)
- // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_VAL]], ptr align 2 [[NONATOMIC_TMP]], i64 6, i1 false)
- // CHECK: [[ATOMIC:%.*]] = load i64, ptr [[ATOMIC_VAL]], align 8
- // CHECK: [[ATOMIC_RES:%.*]] = atomicrmw xchg ptr [[ADDR]], i64 [[ATOMIC]] seq_cst, align 8
- // CHECK: store i64 [[ATOMIC_RES]], ptr [[ATOMIC_RES_PTR:%.*]], align 8
- // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 2 %agg.result, ptr align 8 [[ATOMIC_RES_PTR]], i32 6, i1 false)
return __c11_atomic_exchange(addr, *val, 5);
}
+// CHECK-LABEL: define dso_local arm_aapcscc zeroext i1 @test_promoted_cmpxchg(
+// CHECK-SAME: ptr noundef [[ADDR:%.*]], ptr noundef [[DESIRED:%.*]], ptr noundef [[NEW:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[NEW_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca [[STRUCT_PS:%.*]], align 2
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { [[STRUCT_PS]], [2 x i8] }, align 8
+// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca { [[STRUCT_PS]], [2 x i8] }, align 8
+// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1
+// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
+// CHECK-NEXT: store ptr [[DESIRED]], ptr [[DESIRED_ADDR]], align 4
+// CHECK-NEXT: store ptr [[NEW]], ptr [[NEW_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DESIRED_ADDR]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[NEW_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[DOTATOMICTMP]], ptr align 2 [[TMP2]], i32 6, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_TEMP]], ptr align 2 [[TMP1]], i64 6, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_TEMP1]], ptr align 2 [[DOTATOMICTMP]], i64 6, i1 false)
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i64, ptr [[ATOMIC_TEMP]], align 8
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[ATOMIC_TEMP1]], align 8
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 8
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1
+// CHECK-NEXT: ret i1 [[LOADEDV]]
+//
_Bool test_promoted_cmpxchg(_Atomic(PS) *addr, PS *desired, PS *new) {
- // CHECK-LABEL: i1 @test_promoted_cmpxchg(ptr noundef %addr, ptr noundef %desired, ptr noundef %new) #0 {
- // CHECK: [[ADDR_ARG:%.*]] = alloca ptr, align 4
- // CHECK: [[DESIRED_ARG:%.*]] = alloca ptr, align 4
- // CHECK: [[NEW_ARG:%.*]] = alloca ptr, align 4
- // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
- // CHECK: [[ATOMIC_DESIRED:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
- // CHECK: [[ATOMIC_NEW:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
- // CHECK: store ptr %addr, ptr [[ADDR_ARG]], align 4
- // CHECK: store ptr %desired, ptr [[DESIRED_ARG]], align 4
- // CHECK: store ptr %new, ptr [[NEW_ARG]], align 4
- // CHECK: [[ADDR:%.*]] = load ptr, ptr [[ADDR_ARG]], align 4
- // CHECK: [[DESIRED:%.*]] = load ptr, ptr [[DESIRED_ARG]], align 4
- // CHECK: [[NEW:%.*]] = load ptr, ptr [[NEW_ARG]], align 4
- // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[NONATOMIC_TMP]], ptr align 2 [[NEW]], i32 6, i1 false)
- // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_DESIRED]], ptr align 2 [[DESIRED]], i64 6, i1 false)
- // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_NEW]], ptr align 2 [[NONATOMIC_TMP]], i64 6, i1 false)
- // CHECK: [[VAL1:%.*]] = load i64, ptr [[ATOMIC_DESIRED]], align 8
- // CHECK: [[VAL2:%.*]] = load i64, ptr [[ATOMIC_NEW]], align 8
- // CHECK: [[RES_PAIR:%.*]] = cmpxchg ptr [[ADDR]], i64 [[VAL1]], i64 [[VAL2]] seq_cst seq_cst, align 8
- // CHECK: [[RES:%.*]] = extractvalue { i64, i1 } [[RES_PAIR]], 1
return __c11_atomic_compare_exchange_strong(addr, desired, *new, 5, 5);
}
struct Empty {};
+// CHECK-LABEL: define dso_local arm_aapcscc void @test_empty_struct_load(
+// CHECK-SAME: ptr noundef [[EMPTY:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_EMPTY:%.*]], align 1
+// CHECK-NEXT: [[EMPTY_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { [[STRUCT_EMPTY]], [1 x i8] }, align 1
+// CHECK-NEXT: store ptr [[EMPTY]], ptr [[EMPTY_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[EMPTY_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load atomic i8, ptr [[TMP0]] seq_cst, align 1
+// CHECK-NEXT: store i8 [[TMP1]], ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[RETVAL]], ptr align 1 [[ATOMIC_TEMP]], i32 0, i1 false)
+// CHECK-NEXT: ret void
+//
struct Empty test_empty_struct_load(_Atomic(struct Empty)* empty) {
- // CHECK-LABEL: @test_empty_struct_load(
- // CHECK: load atomic i8, ptr {{.*}}, align 1
return __c11_atomic_load(empty, 5);
}
+// CHECK-LABEL: define dso_local arm_aapcscc void @test_empty_struct_store(
+// CHECK-SAME: ptr noundef [[EMPTY:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VALUE:%.*]] = alloca [[STRUCT_EMPTY:%.*]], align 1
+// CHECK-NEXT: [[EMPTY_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca [[STRUCT_EMPTY]], align 1
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { [[STRUCT_EMPTY]], [1 x i8] }, align 1
+// CHECK-NEXT: store ptr [[EMPTY]], ptr [[EMPTY_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[EMPTY_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DOTATOMICTMP]], ptr align 1 [[VALUE]], i32 0, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[ATOMIC_TEMP]], ptr align 1 [[DOTATOMICTMP]], i64 0, i1 false)
+// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
+// CHECK-NEXT: store atomic i8 [[TMP1]], ptr [[TMP0]] seq_cst, align 1
+// CHECK-NEXT: ret void
+//
void test_empty_struct_store(_Atomic(struct Empty)* empty, struct Empty value) {
- // CHECK-LABEL: @test_empty_struct_store(
- // CHECK: store atomic i8 {{.*}}, ptr {{.*}}, align 1
__c11_atomic_store(empty, value, 5);
}
diff --git a/clang/test/CodeGen/sanitize-atomic-int-overflow.c b/clang/test/CodeGen/sanitize-atomic-int-overflow.c
index da8152ad7aad1f..6dacd3c5a81e63 100644
--- a/clang/test/CodeGen/sanitize-atomic-int-overflow.c
+++ b/clang/test/CodeGen/sanitize-atomic-int-overflow.c
@@ -1,33 +1,18 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple x86_64-apple-macosx10.14.0 -fsanitize=unsigned-integer-overflow %s -emit-llvm -o - | FileCheck %s
_Atomic(unsigned) atomic;
-// CHECK-LABEL: define{{.*}} void @cmpd_assign
void cmpd_assign(void) {
- // CHECK: br label %[[LOOP_START:.*]]
- // CHECK: [[LOOP_START]]:
- // CHECK-NEXT: phi i32 {{.*}}, [ {{.*}}, %[[INCOMING_BLOCK:.*]] ]
- // CHECK: [[INCOMING_BLOCK]]:
- // CHECK-NEXT: cmpxchg
- // CHECK-NEXT: extractvalue
- // CHECK-NEXT: extractvalue
- // CHECK-NEXT: br i1 %8, label %{{.*}}, label %[[LOOP_START]]
atomic += 1;
}
-// CHECK-LABEL: define{{.*}} void @inc
void inc(void) {
- // CHECK: br label %[[LOOP_START:.*]]
- // CHECK: [[LOOP_START]]:
- // CHECK-NEXT: phi i32 {{.*}}, [ {{.*}}, %[[INCOMING_BLOCK:.*]] ]
- // CHECK: [[INCOMING_BLOCK]]:
- // CHECK-NEXT: cmpxchg
- // CHECK-NEXT: extractvalue
- // CHECK-NEXT: extractvalue
- // CHECK-NEXT: br i1 %8, label %{{.*}}, label %[[LOOP_START]]
atomic++;
}
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// CHECK: {{.*}}
diff --git a/clang/test/CodeGenCUDA/atomic-ops.cu b/clang/test/CodeGenCUDA/atomic-ops.cu
index fbc042caa809f9..8a0b984971c455 100644
--- a/clang/test/CodeGenCUDA/atomic-ops.cu
+++ b/clang/test/CodeGenCUDA/atomic-ops.cu
@@ -1,19 +1,180 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -x hip -std=c++11 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s
#include "Inputs/cuda.h"
-// CHECK-LABEL: @_Z24atomic32_op_singlethreadPiii
-// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") monotonic monotonic, align 4
-// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: load atomic i32, ptr {{%[0-9]+}} syncscope("singlethread-one-as") monotonic, align 4
-// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("singlethread-one-as") monotonic, align 4
+// CHECK-LABEL: define dso_local noundef i32 @_Z24atomic32_op_singlethreadPiii(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[VAL:%.*]], i32 noundef [[DESIRED:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[FLAG:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP10:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP15:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP16:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP17:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP18:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP19:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP21:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP22:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP23:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP24:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP25:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP26:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
+// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
+// CHECK-NEXT: [[FLAG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAG]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// CHECK-NEXT: [[CMPXCHG_BOOL2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL2]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP10]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP15]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP16]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP17_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP17]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP18_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP18]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP19_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP19]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP20]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP21]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP22_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP22]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP23_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP23]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP24_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP24]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP25_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP25]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP26_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP26]] to ptr
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("singlethread-one-as") monotonic monotonic, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK-NEXT: store i8 [[STOREDV]], ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_EXPECTED3:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i32 [[CMPXCHG_EXPECTED3]], i32 [[CMPXCHG_DESIRED4]] syncscope("singlethread-one-as") monotonic monotonic, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
+// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
+// CHECK-NEXT: store i8 [[STOREDV9]], ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP7]], ptr [[DOTATOMICTMP10_ASCAST]], align 4
+// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTATOMICTMP10_ASCAST]], align 4
+// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw xchg ptr [[TMP6]], i32 [[TMP8]] syncscope("singlethread-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP9]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP10]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP12]], ptr [[DOTATOMICTMP11_ASCAST]], align 4
+// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTATOMICTMP11_ASCAST]], align 4
+// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP11]], i32 [[TMP13]] syncscope("singlethread-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP14]], ptr [[ATOMIC_TEMP12_ASCAST]], align 4
+// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ATOMIC_TEMP12_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP15]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP17]], ptr [[DOTATOMICTMP13_ASCAST]], align 4
+// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTATOMICTMP13_ASCAST]], align 4
+// CHECK-NEXT: [[TMP19:%.*]] = atomicrmw sub ptr [[TMP16]], i32 [[TMP18]] syncscope("singlethread-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP19]], ptr [[ATOMIC_TEMP14_ASCAST]], align 4
+// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ATOMIC_TEMP14_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP20]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP22]], ptr [[DOTATOMICTMP15_ASCAST]], align 4
+// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTATOMICTMP15_ASCAST]], align 4
+// CHECK-NEXT: [[TMP24:%.*]] = atomicrmw and ptr [[TMP21]], i32 [[TMP23]] syncscope("singlethread-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP24]], ptr [[ATOMIC_TEMP16_ASCAST]], align 4
+// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[ATOMIC_TEMP16_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP25]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP27]], ptr [[DOTATOMICTMP17_ASCAST]], align 4
+// CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTATOMICTMP17_ASCAST]], align 4
+// CHECK-NEXT: [[TMP29:%.*]] = atomicrmw or ptr [[TMP26]], i32 [[TMP28]] syncscope("singlethread-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP29]], ptr [[ATOMIC_TEMP18_ASCAST]], align 4
+// CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ATOMIC_TEMP18_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP30]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP32]], ptr [[DOTATOMICTMP19_ASCAST]], align 4
+// CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTATOMICTMP19_ASCAST]], align 4
+// CHECK-NEXT: [[TMP34:%.*]] = atomicrmw xor ptr [[TMP31]], i32 [[TMP33]] syncscope("singlethread-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP34]], ptr [[ATOMIC_TEMP20_ASCAST]], align 4
+// CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[ATOMIC_TEMP20_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP35]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP37]], ptr [[DOTATOMICTMP21_ASCAST]], align 4
+// CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTATOMICTMP21_ASCAST]], align 4
+// CHECK-NEXT: [[TMP39:%.*]] = atomicrmw min ptr [[TMP36]], i32 [[TMP38]] syncscope("singlethread-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP39]], ptr [[ATOMIC_TEMP22_ASCAST]], align 4
+// CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr [[ATOMIC_TEMP22_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP40]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP42]], ptr [[DOTATOMICTMP23_ASCAST]], align 4
+// CHECK-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTATOMICTMP23_ASCAST]], align 4
+// CHECK-NEXT: [[TMP44:%.*]] = atomicrmw max ptr [[TMP41]], i32 [[TMP43]] syncscope("singlethread-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP44]], ptr [[ATOMIC_TEMP24_ASCAST]], align 4
+// CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[ATOMIC_TEMP24_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP45]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP46:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP47:%.*]] = load atomic i32, ptr [[TMP46]] syncscope("singlethread-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP47]], ptr [[ATOMIC_TEMP25_ASCAST]], align 4
+// CHECK-NEXT: [[TMP48:%.*]] = load i32, ptr [[ATOMIC_TEMP25_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP48]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP49:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP50]], ptr [[DOTATOMICTMP26_ASCAST]], align 4
+// CHECK-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTATOMICTMP26_ASCAST]], align 4
+// CHECK-NEXT: store atomic i32 [[TMP51]], ptr [[TMP49]] syncscope("singlethread-one-as") monotonic, align 4
+// CHECK-NEXT: [[TMP52:%.*]] = load i8, ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV27:%.*]] = trunc i8 [[TMP52]] to i1
+// CHECK-NEXT: br i1 [[LOADEDV27]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK: [[COND_TRUE]]:
+// CHECK-NEXT: [[TMP53:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: br label %[[COND_END:.*]]
+// CHECK: [[COND_FALSE]]:
+// CHECK-NEXT: [[TMP54:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: br label %[[COND_END]]
+// CHECK: [[COND_END]]:
+// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP53]], %[[COND_TRUE]] ], [ [[TMP54]], %[[COND_FALSE]] ]
+// CHECK-NEXT: ret i32 [[COND]]
+//
__device__ int atomic32_op_singlethread(int *ptr, int val, int desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
@@ -30,28 +191,226 @@ __device__ int atomic32_op_singlethread(int *ptr, int val, int desired) {
return flag ? val : desired;
}
-// CHECK-LABEL: @_Z25atomicu32_op_singlethreadPjjj
-// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK-LABEL: define dso_local noundef i32 @_Z25atomicu32_op_singlethreadPjjj(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[VAL:%.*]], i32 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
+// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw umin ptr [[TMP0]], i32 [[TMP2]] syncscope("singlethread-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP4]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// CHECK-NEXT: [[TMP8:%.*]] = atomicrmw umax ptr [[TMP5]], i32 [[TMP7]] syncscope("singlethread-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP8]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4
+// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP9]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: ret i32 [[TMP10]]
+//
__device__ unsigned int atomicu32_op_singlethread(unsigned int *ptr, unsigned int val, unsigned int desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
return val;
}
-// CHECK-LABEL: @_Z21atomic32_op_wavefrontPiii
-// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") monotonic monotonic, align 4
-// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: load atomic i32, ptr {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 4
-// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("wavefront-one-as") monotonic, align 4
+// CHECK-LABEL: define dso_local noundef i32 @_Z21atomic32_op_wavefrontPiii(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[VAL:%.*]], i32 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[FLAG:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP10:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP15:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP16:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP17:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP18:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP19:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP21:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP22:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP23:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP24:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP25:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP26:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
+// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
+// CHECK-NEXT: [[FLAG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAG]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// CHECK-NEXT: [[CMPXCHG_BOOL2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL2]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP10]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP15]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP16]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP17_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP17]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP18_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP18]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP19_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP19]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP20]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP21]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP22_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP22]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP23_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP23]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP24_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP24]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP25_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP25]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP26_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP26]] to ptr
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("wavefront-one-as") monotonic monotonic, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK-NEXT: store i8 [[STOREDV]], ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_EXPECTED3:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i32 [[CMPXCHG_EXPECTED3]], i32 [[CMPXCHG_DESIRED4]] syncscope("wavefront-one-as") monotonic monotonic, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
+// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
+// CHECK-NEXT: store i8 [[STOREDV9]], ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP7]], ptr [[DOTATOMICTMP10_ASCAST]], align 4
+// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTATOMICTMP10_ASCAST]], align 4
+// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw xchg ptr [[TMP6]], i32 [[TMP8]] syncscope("wavefront-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP9]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP10]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP12]], ptr [[DOTATOMICTMP11_ASCAST]], align 4
+// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTATOMICTMP11_ASCAST]], align 4
+// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP11]], i32 [[TMP13]] syncscope("wavefront-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP14]], ptr [[ATOMIC_TEMP12_ASCAST]], align 4
+// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ATOMIC_TEMP12_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP15]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP17]], ptr [[DOTATOMICTMP13_ASCAST]], align 4
+// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTATOMICTMP13_ASCAST]], align 4
+// CHECK-NEXT: [[TMP19:%.*]] = atomicrmw sub ptr [[TMP16]], i32 [[TMP18]] syncscope("wavefront-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP19]], ptr [[ATOMIC_TEMP14_ASCAST]], align 4
+// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ATOMIC_TEMP14_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP20]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP22]], ptr [[DOTATOMICTMP15_ASCAST]], align 4
+// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTATOMICTMP15_ASCAST]], align 4
+// CHECK-NEXT: [[TMP24:%.*]] = atomicrmw and ptr [[TMP21]], i32 [[TMP23]] syncscope("wavefront-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP24]], ptr [[ATOMIC_TEMP16_ASCAST]], align 4
+// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[ATOMIC_TEMP16_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP25]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP27]], ptr [[DOTATOMICTMP17_ASCAST]], align 4
+// CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTATOMICTMP17_ASCAST]], align 4
+// CHECK-NEXT: [[TMP29:%.*]] = atomicrmw or ptr [[TMP26]], i32 [[TMP28]] syncscope("wavefront-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP29]], ptr [[ATOMIC_TEMP18_ASCAST]], align 4
+// CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ATOMIC_TEMP18_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP30]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP32]], ptr [[DOTATOMICTMP19_ASCAST]], align 4
+// CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTATOMICTMP19_ASCAST]], align 4
+// CHECK-NEXT: [[TMP34:%.*]] = atomicrmw xor ptr [[TMP31]], i32 [[TMP33]] syncscope("wavefront-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP34]], ptr [[ATOMIC_TEMP20_ASCAST]], align 4
+// CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[ATOMIC_TEMP20_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP35]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP37]], ptr [[DOTATOMICTMP21_ASCAST]], align 4
+// CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTATOMICTMP21_ASCAST]], align 4
+// CHECK-NEXT: [[TMP39:%.*]] = atomicrmw min ptr [[TMP36]], i32 [[TMP38]] syncscope("wavefront-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP39]], ptr [[ATOMIC_TEMP22_ASCAST]], align 4
+// CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr [[ATOMIC_TEMP22_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP40]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP42]], ptr [[DOTATOMICTMP23_ASCAST]], align 4
+// CHECK-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTATOMICTMP23_ASCAST]], align 4
+// CHECK-NEXT: [[TMP44:%.*]] = atomicrmw max ptr [[TMP41]], i32 [[TMP43]] syncscope("wavefront-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP44]], ptr [[ATOMIC_TEMP24_ASCAST]], align 4
+// CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[ATOMIC_TEMP24_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP45]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP46:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP47:%.*]] = load atomic i32, ptr [[TMP46]] syncscope("wavefront-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP47]], ptr [[ATOMIC_TEMP25_ASCAST]], align 4
+// CHECK-NEXT: [[TMP48:%.*]] = load i32, ptr [[ATOMIC_TEMP25_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP48]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP49:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP50]], ptr [[DOTATOMICTMP26_ASCAST]], align 4
+// CHECK-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTATOMICTMP26_ASCAST]], align 4
+// CHECK-NEXT: store atomic i32 [[TMP51]], ptr [[TMP49]] syncscope("wavefront-one-as") monotonic, align 4
+// CHECK-NEXT: [[TMP52:%.*]] = load i8, ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV27:%.*]] = trunc i8 [[TMP52]] to i1
+// CHECK-NEXT: br i1 [[LOADEDV27]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK: [[COND_TRUE]]:
+// CHECK-NEXT: [[TMP53:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: br label %[[COND_END:.*]]
+// CHECK: [[COND_FALSE]]:
+// CHECK-NEXT: [[TMP54:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: br label %[[COND_END]]
+// CHECK: [[COND_END]]:
+// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP53]], %[[COND_TRUE]] ], [ [[TMP54]], %[[COND_FALSE]] ]
+// CHECK-NEXT: ret i32 [[COND]]
+//
__device__ int atomic32_op_wavefront(int *ptr, int val, int desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
@@ -68,27 +427,219 @@ __device__ int atomic32_op_wavefront(int *ptr, int val, int desired) {
return flag ? val : desired;
}
-// CHECK-LABEL: @_Z22atomicu32_op_wavefrontPjjj
-// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK-LABEL: define dso_local noundef i32 @_Z22atomicu32_op_wavefrontPjjj(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[VAL:%.*]], i32 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
+// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw umin ptr [[TMP0]], i32 [[TMP2]] syncscope("wavefront-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP4]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// CHECK-NEXT: [[TMP8:%.*]] = atomicrmw umax ptr [[TMP5]], i32 [[TMP7]] syncscope("wavefront-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP8]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4
+// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP9]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: ret i32 [[TMP10]]
+//
__device__ unsigned int atomicu32_op_wavefront(unsigned int *ptr, unsigned int val, unsigned int desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
return val;
}
-// CHECK-LABEL: @_Z21atomic32_op_workgroupPiii
-// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") monotonic monotonic, align 4
-// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("workgroup-one-as") monotonic, align 4
+// CHECK-LABEL: define dso_local noundef i32 @_Z21atomic32_op_workgroupPiii(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[VAL:%.*]], i32 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[FLAG:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP10:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP15:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP16:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP17:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP18:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP19:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP21:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP22:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP23:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP24:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP25:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
+// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
+// CHECK-NEXT: [[FLAG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAG]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// CHECK-NEXT: [[CMPXCHG_BOOL2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL2]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP10]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP15]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP16]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP17_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP17]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP18_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP18]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP19_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP19]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP20]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP21]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP22_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP22]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP23_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP23]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP24_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP24]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP25_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP25]] to ptr
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("workgroup-one-as") monotonic monotonic, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK-NEXT: store i8 [[STOREDV]], ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_EXPECTED3:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i32 [[CMPXCHG_EXPECTED3]], i32 [[CMPXCHG_DESIRED4]] syncscope("workgroup-one-as") monotonic monotonic, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
+// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
+// CHECK-NEXT: store i8 [[STOREDV9]], ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP7]], ptr [[DOTATOMICTMP10_ASCAST]], align 4
+// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTATOMICTMP10_ASCAST]], align 4
+// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw xchg ptr [[TMP6]], i32 [[TMP8]] syncscope("workgroup-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP9]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP10]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP12]], ptr [[DOTATOMICTMP11_ASCAST]], align 4
+// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTATOMICTMP11_ASCAST]], align 4
+// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP11]], i32 [[TMP13]] syncscope("workgroup-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP14]], ptr [[ATOMIC_TEMP12_ASCAST]], align 4
+// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ATOMIC_TEMP12_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP15]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP17]], ptr [[DOTATOMICTMP13_ASCAST]], align 4
+// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTATOMICTMP13_ASCAST]], align 4
+// CHECK-NEXT: [[TMP19:%.*]] = atomicrmw sub ptr [[TMP16]], i32 [[TMP18]] syncscope("workgroup-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP19]], ptr [[ATOMIC_TEMP14_ASCAST]], align 4
+// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ATOMIC_TEMP14_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP20]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP22]], ptr [[DOTATOMICTMP15_ASCAST]], align 4
+// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTATOMICTMP15_ASCAST]], align 4
+// CHECK-NEXT: [[TMP24:%.*]] = atomicrmw and ptr [[TMP21]], i32 [[TMP23]] syncscope("workgroup-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP24]], ptr [[ATOMIC_TEMP16_ASCAST]], align 4
+// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[ATOMIC_TEMP16_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP25]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP27]], ptr [[DOTATOMICTMP17_ASCAST]], align 4
+// CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTATOMICTMP17_ASCAST]], align 4
+// CHECK-NEXT: [[TMP29:%.*]] = atomicrmw or ptr [[TMP26]], i32 [[TMP28]] syncscope("workgroup-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP29]], ptr [[ATOMIC_TEMP18_ASCAST]], align 4
+// CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ATOMIC_TEMP18_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP30]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP32]], ptr [[DOTATOMICTMP19_ASCAST]], align 4
+// CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTATOMICTMP19_ASCAST]], align 4
+// CHECK-NEXT: [[TMP34:%.*]] = atomicrmw xor ptr [[TMP31]], i32 [[TMP33]] syncscope("workgroup-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP34]], ptr [[ATOMIC_TEMP20_ASCAST]], align 4
+// CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[ATOMIC_TEMP20_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP35]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP37]], ptr [[DOTATOMICTMP21_ASCAST]], align 4
+// CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTATOMICTMP21_ASCAST]], align 4
+// CHECK-NEXT: [[TMP39:%.*]] = atomicrmw min ptr [[TMP36]], i32 [[TMP38]] syncscope("workgroup-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP39]], ptr [[ATOMIC_TEMP22_ASCAST]], align 4
+// CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr [[ATOMIC_TEMP22_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP40]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP42]], ptr [[DOTATOMICTMP23_ASCAST]], align 4
+// CHECK-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTATOMICTMP23_ASCAST]], align 4
+// CHECK-NEXT: [[TMP44:%.*]] = atomicrmw max ptr [[TMP41]], i32 [[TMP43]] syncscope("workgroup-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP44]], ptr [[ATOMIC_TEMP24_ASCAST]], align 4
+// CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[ATOMIC_TEMP24_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP45]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP46:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP47:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP47]], ptr [[DOTATOMICTMP25_ASCAST]], align 4
+// CHECK-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTATOMICTMP25_ASCAST]], align 4
+// CHECK-NEXT: store atomic i32 [[TMP48]], ptr [[TMP46]] syncscope("workgroup-one-as") monotonic, align 4
+// CHECK-NEXT: [[TMP49:%.*]] = load i8, ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV26:%.*]] = trunc i8 [[TMP49]] to i1
+// CHECK-NEXT: br i1 [[LOADEDV26]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK: [[COND_TRUE]]:
+// CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: br label %[[COND_END:.*]]
+// CHECK: [[COND_FALSE]]:
+// CHECK-NEXT: [[TMP51:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: br label %[[COND_END]]
+// CHECK: [[COND_END]]:
+// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP50]], %[[COND_TRUE]] ], [ [[TMP51]], %[[COND_FALSE]] ]
+// CHECK-NEXT: ret i32 [[COND]]
+//
__device__ int atomic32_op_workgroup(int *ptr, int val, int desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
@@ -104,27 +655,219 @@ __device__ int atomic32_op_workgroup(int *ptr, int val, int desired) {
return flag ? val : desired;
}
-// CHECK-LABEL: @_Z22atomicu32_op_workgroupPjjj
-// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK-LABEL: define dso_local noundef i32 @_Z22atomicu32_op_workgroupPjjj(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[VAL:%.*]], i32 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
+// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw umin ptr [[TMP0]], i32 [[TMP2]] syncscope("workgroup-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP4]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// CHECK-NEXT: [[TMP8:%.*]] = atomicrmw umax ptr [[TMP5]], i32 [[TMP7]] syncscope("workgroup-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP8]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4
+// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP9]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: ret i32 [[TMP10]]
+//
__device__ unsigned int atomicu32_op_workgroup(unsigned int *ptr, unsigned int val, unsigned int desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
return val;
}
-// CHECK-LABEL: @_Z17atomic32_op_agentPiii
-// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") monotonic monotonic, align 4
-// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("agent-one-as") monotonic, align 4
+// CHECK-LABEL: define dso_local noundef i32 @_Z17atomic32_op_agentPiii(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[VAL:%.*]], i32 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[FLAG:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP10:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP15:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP16:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP17:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP18:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP19:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP21:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP22:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP23:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP24:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP25:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
+// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
+// CHECK-NEXT: [[FLAG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAG]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// CHECK-NEXT: [[CMPXCHG_BOOL2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL2]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP10]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP15]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP16]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP17_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP17]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP18_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP18]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP19_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP19]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP20]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP21]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP22_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP22]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP23_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP23]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP24_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP24]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP25_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP25]] to ptr
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("agent-one-as") monotonic monotonic, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK-NEXT: store i8 [[STOREDV]], ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_EXPECTED3:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i32 [[CMPXCHG_EXPECTED3]], i32 [[CMPXCHG_DESIRED4]] syncscope("agent-one-as") monotonic monotonic, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
+// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
+// CHECK-NEXT: store i8 [[STOREDV9]], ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP7]], ptr [[DOTATOMICTMP10_ASCAST]], align 4
+// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTATOMICTMP10_ASCAST]], align 4
+// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw xchg ptr [[TMP6]], i32 [[TMP8]] syncscope("agent-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP9]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP10]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP12]], ptr [[DOTATOMICTMP11_ASCAST]], align 4
+// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTATOMICTMP11_ASCAST]], align 4
+// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP11]], i32 [[TMP13]] syncscope("agent-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP14]], ptr [[ATOMIC_TEMP12_ASCAST]], align 4
+// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ATOMIC_TEMP12_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP15]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP17]], ptr [[DOTATOMICTMP13_ASCAST]], align 4
+// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTATOMICTMP13_ASCAST]], align 4
+// CHECK-NEXT: [[TMP19:%.*]] = atomicrmw sub ptr [[TMP16]], i32 [[TMP18]] syncscope("agent-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP19]], ptr [[ATOMIC_TEMP14_ASCAST]], align 4
+// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ATOMIC_TEMP14_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP20]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP22]], ptr [[DOTATOMICTMP15_ASCAST]], align 4
+// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTATOMICTMP15_ASCAST]], align 4
+// CHECK-NEXT: [[TMP24:%.*]] = atomicrmw and ptr [[TMP21]], i32 [[TMP23]] syncscope("agent-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP24]], ptr [[ATOMIC_TEMP16_ASCAST]], align 4
+// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[ATOMIC_TEMP16_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP25]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP27]], ptr [[DOTATOMICTMP17_ASCAST]], align 4
+// CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTATOMICTMP17_ASCAST]], align 4
+// CHECK-NEXT: [[TMP29:%.*]] = atomicrmw or ptr [[TMP26]], i32 [[TMP28]] syncscope("agent-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP29]], ptr [[ATOMIC_TEMP18_ASCAST]], align 4
+// CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ATOMIC_TEMP18_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP30]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP32]], ptr [[DOTATOMICTMP19_ASCAST]], align 4
+// CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTATOMICTMP19_ASCAST]], align 4
+// CHECK-NEXT: [[TMP34:%.*]] = atomicrmw xor ptr [[TMP31]], i32 [[TMP33]] syncscope("agent-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP34]], ptr [[ATOMIC_TEMP20_ASCAST]], align 4
+// CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[ATOMIC_TEMP20_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP35]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP37]], ptr [[DOTATOMICTMP21_ASCAST]], align 4
+// CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTATOMICTMP21_ASCAST]], align 4
+// CHECK-NEXT: [[TMP39:%.*]] = atomicrmw min ptr [[TMP36]], i32 [[TMP38]] syncscope("agent-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP39]], ptr [[ATOMIC_TEMP22_ASCAST]], align 4
+// CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr [[ATOMIC_TEMP22_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP40]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP42]], ptr [[DOTATOMICTMP23_ASCAST]], align 4
+// CHECK-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTATOMICTMP23_ASCAST]], align 4
+// CHECK-NEXT: [[TMP44:%.*]] = atomicrmw max ptr [[TMP41]], i32 [[TMP43]] syncscope("agent-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP44]], ptr [[ATOMIC_TEMP24_ASCAST]], align 4
+// CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[ATOMIC_TEMP24_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP45]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP46:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP47:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP47]], ptr [[DOTATOMICTMP25_ASCAST]], align 4
+// CHECK-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTATOMICTMP25_ASCAST]], align 4
+// CHECK-NEXT: store atomic i32 [[TMP48]], ptr [[TMP46]] syncscope("agent-one-as") monotonic, align 4
+// CHECK-NEXT: [[TMP49:%.*]] = load i8, ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV26:%.*]] = trunc i8 [[TMP49]] to i1
+// CHECK-NEXT: br i1 [[LOADEDV26]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK: [[COND_TRUE]]:
+// CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: br label %[[COND_END:.*]]
+// CHECK: [[COND_FALSE]]:
+// CHECK-NEXT: [[TMP51:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: br label %[[COND_END]]
+// CHECK: [[COND_END]]:
+// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP50]], %[[COND_TRUE]] ], [ [[TMP51]], %[[COND_FALSE]] ]
+// CHECK-NEXT: ret i32 [[COND]]
+//
__device__ int atomic32_op_agent(int *ptr, int val, int desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
@@ -140,28 +883,226 @@ __device__ int atomic32_op_agent(int *ptr, int val, int desired) {
return flag ? val : desired;
}
-// CHECK-LABEL: @_Z18atomicu32_op_agentPjjj
-// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK-LABEL: define dso_local noundef i32 @_Z18atomicu32_op_agentPjjj(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[VAL:%.*]], i32 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
+// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw umin ptr [[TMP0]], i32 [[TMP2]] syncscope("agent-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP4]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// CHECK-NEXT: [[TMP8:%.*]] = atomicrmw umax ptr [[TMP5]], i32 [[TMP7]] syncscope("agent-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP8]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4
+// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP9]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: ret i32 [[TMP10]]
+//
__device__ unsigned int atomicu32_op_agent(unsigned int *ptr, unsigned int val, unsigned int desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
return val;
}
-// CHECK-LABEL: @_Z18atomic32_op_systemPiii
-// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
-// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") monotonic monotonic, align 4
-// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
-// CHECK: load i32, ptr %{{.*}}, align 4
-// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("one-as") monotonic, align 4
+// CHECK-LABEL: define dso_local noundef i32 @_Z18atomic32_op_systemPiii(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[VAL:%.*]], i32 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[FLAG:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP10:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP15:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP16:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP17:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP18:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP19:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP21:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP22:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP23:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP24:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP25:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP26:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
+// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
+// CHECK-NEXT: [[FLAG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAG]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// CHECK-NEXT: [[CMPXCHG_BOOL2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL2]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP10]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP15]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP16]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP17_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP17]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP18_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP18]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP19_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP19]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP20]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP21]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP22_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP22]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP23_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP23]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP24_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP24]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP25_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP25]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP26_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP26]] to ptr
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("one-as") monotonic monotonic, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK-NEXT: store i8 [[STOREDV]], ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_EXPECTED3:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i32 [[CMPXCHG_EXPECTED3]], i32 [[CMPXCHG_DESIRED4]] syncscope("one-as") monotonic monotonic, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
+// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
+// CHECK-NEXT: store i8 [[STOREDV9]], ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP7]], ptr [[DOTATOMICTMP10_ASCAST]], align 4
+// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTATOMICTMP10_ASCAST]], align 4
+// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw xchg ptr [[TMP6]], i32 [[TMP8]] syncscope("one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP9]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP10]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP12]], ptr [[DOTATOMICTMP11_ASCAST]], align 4
+// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTATOMICTMP11_ASCAST]], align 4
+// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP11]], i32 [[TMP13]] syncscope("one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP14]], ptr [[ATOMIC_TEMP12_ASCAST]], align 4
+// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ATOMIC_TEMP12_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP15]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP17]], ptr [[DOTATOMICTMP13_ASCAST]], align 4
+// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTATOMICTMP13_ASCAST]], align 4
+// CHECK-NEXT: [[TMP19:%.*]] = atomicrmw sub ptr [[TMP16]], i32 [[TMP18]] syncscope("one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP19]], ptr [[ATOMIC_TEMP14_ASCAST]], align 4
+// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ATOMIC_TEMP14_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP20]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP22]], ptr [[DOTATOMICTMP15_ASCAST]], align 4
+// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTATOMICTMP15_ASCAST]], align 4
+// CHECK-NEXT: [[TMP24:%.*]] = atomicrmw and ptr [[TMP21]], i32 [[TMP23]] syncscope("one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP24]], ptr [[ATOMIC_TEMP16_ASCAST]], align 4
+// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[ATOMIC_TEMP16_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP25]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP27]], ptr [[DOTATOMICTMP17_ASCAST]], align 4
+// CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTATOMICTMP17_ASCAST]], align 4
+// CHECK-NEXT: [[TMP29:%.*]] = atomicrmw or ptr [[TMP26]], i32 [[TMP28]] syncscope("one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP29]], ptr [[ATOMIC_TEMP18_ASCAST]], align 4
+// CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ATOMIC_TEMP18_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP30]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP32]], ptr [[DOTATOMICTMP19_ASCAST]], align 4
+// CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTATOMICTMP19_ASCAST]], align 4
+// CHECK-NEXT: [[TMP34:%.*]] = atomicrmw xor ptr [[TMP31]], i32 [[TMP33]] syncscope("one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP34]], ptr [[ATOMIC_TEMP20_ASCAST]], align 4
+// CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[ATOMIC_TEMP20_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP35]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP37]], ptr [[DOTATOMICTMP21_ASCAST]], align 4
+// CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTATOMICTMP21_ASCAST]], align 4
+// CHECK-NEXT: [[TMP39:%.*]] = atomicrmw min ptr [[TMP36]], i32 [[TMP38]] syncscope("one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP39]], ptr [[ATOMIC_TEMP22_ASCAST]], align 4
+// CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr [[ATOMIC_TEMP22_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP40]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP42]], ptr [[DOTATOMICTMP23_ASCAST]], align 4
+// CHECK-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTATOMICTMP23_ASCAST]], align 4
+// CHECK-NEXT: [[TMP44:%.*]] = atomicrmw max ptr [[TMP41]], i32 [[TMP43]] syncscope("one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP44]], ptr [[ATOMIC_TEMP24_ASCAST]], align 4
+// CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[ATOMIC_TEMP24_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP45]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP46:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP47:%.*]] = load atomic i32, ptr [[TMP46]] syncscope("one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP47]], ptr [[ATOMIC_TEMP25_ASCAST]], align 4
+// CHECK-NEXT: [[TMP48:%.*]] = load i32, ptr [[ATOMIC_TEMP25_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP48]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP49:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP50]], ptr [[DOTATOMICTMP26_ASCAST]], align 4
+// CHECK-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTATOMICTMP26_ASCAST]], align 4
+// CHECK-NEXT: store atomic i32 [[TMP51]], ptr [[TMP49]] syncscope("one-as") monotonic, align 4
+// CHECK-NEXT: [[TMP52:%.*]] = load i8, ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV27:%.*]] = trunc i8 [[TMP52]] to i1
+// CHECK-NEXT: br i1 [[LOADEDV27]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK: [[COND_TRUE]]:
+// CHECK-NEXT: [[TMP53:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: br label %[[COND_END:.*]]
+// CHECK: [[COND_FALSE]]:
+// CHECK-NEXT: [[TMP54:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: br label %[[COND_END]]
+// CHECK: [[COND_END]]:
+// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP53]], %[[COND_TRUE]] ], [ [[TMP54]], %[[COND_FALSE]] ]
+// CHECK-NEXT: ret i32 [[COND]]
+//
__device__ int atomic32_op_system(int *ptr, int val, int desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
@@ -178,27 +1119,222 @@ __device__ int atomic32_op_system(int *ptr, int val, int desired) {
return flag ? val : desired;
}
-// CHECK-LABEL: @_Z19atomicu32_op_systemPjjj
-// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
+// CHECK-LABEL: define dso_local noundef i32 @_Z19atomicu32_op_systemPjjj(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[VAL:%.*]], i32 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
+// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw umin ptr [[TMP0]], i32 [[TMP2]] syncscope("one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP4]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// CHECK-NEXT: [[TMP8:%.*]] = atomicrmw umax ptr [[TMP5]], i32 [[TMP7]] syncscope("one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP8]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4
+// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP9]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: ret i32 [[TMP10]]
+//
__device__ unsigned int atomicu32_op_system(unsigned int *ptr, unsigned int val, unsigned int desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
return val;
}
-// CHECK-LABEL: @_Z24atomic64_op_singlethreadPxS_xx
-// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") monotonic monotonic, align 8
-// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("singlethread-one-as") monotonic, align 8
+// CHECK-LABEL: define dso_local noundef i64 @_Z24atomic64_op_singlethreadPxS_xx(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i64 noundef [[VAL:%.*]], i64 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[FLAG:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP10:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP15:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP16:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP17:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP18:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP19:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP21:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP22:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP23:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP24:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP25:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
+// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
+// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
+// CHECK-NEXT: [[FLAG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAG]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// CHECK-NEXT: [[CMPXCHG_BOOL2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL2]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP10]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP15]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP16]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP17_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP17]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP18_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP18]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP19_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP19]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP20]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP21]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP22_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP22]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP23_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP23]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP24_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP24]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP25_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP25]] to ptr
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] syncscope("singlethread-one-as") monotonic monotonic, align 8
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK-NEXT: store i8 [[STOREDV]], ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP4]], ptr [[DOTATOMICTMP1_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_EXPECTED3:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i64 [[CMPXCHG_EXPECTED3]], i64 [[CMPXCHG_DESIRED4]] syncscope("singlethread-one-as") monotonic monotonic, align 8
+// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 1
+// CHECK-NEXT: store i64 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
+// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
+// CHECK-NEXT: store i8 [[STOREDV9]], ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP7]], ptr [[DOTATOMICTMP10_ASCAST]], align 8
+// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTATOMICTMP10_ASCAST]], align 8
+// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw xchg ptr [[TMP6]], i64 [[TMP8]] syncscope("singlethread-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP9]], ptr [[ATOMIC_TEMP_ASCAST]], align 8
+// CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[ATOMIC_TEMP_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP10]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP12]], ptr [[DOTATOMICTMP11_ASCAST]], align 8
+// CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTATOMICTMP11_ASCAST]], align 8
+// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP11]], i64 [[TMP13]] syncscope("singlethread-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP14]], ptr [[ATOMIC_TEMP12_ASCAST]], align 8
+// CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[ATOMIC_TEMP12_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP15]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP17]], ptr [[DOTATOMICTMP13_ASCAST]], align 8
+// CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTATOMICTMP13_ASCAST]], align 8
+// CHECK-NEXT: [[TMP19:%.*]] = atomicrmw sub ptr [[TMP16]], i64 [[TMP18]] syncscope("singlethread-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP19]], ptr [[ATOMIC_TEMP14_ASCAST]], align 8
+// CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr [[ATOMIC_TEMP14_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP20]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP22]], ptr [[DOTATOMICTMP15_ASCAST]], align 8
+// CHECK-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTATOMICTMP15_ASCAST]], align 8
+// CHECK-NEXT: [[TMP24:%.*]] = atomicrmw and ptr [[TMP21]], i64 [[TMP23]] syncscope("singlethread-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP24]], ptr [[ATOMIC_TEMP16_ASCAST]], align 8
+// CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[ATOMIC_TEMP16_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP25]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP27:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP27]], ptr [[DOTATOMICTMP17_ASCAST]], align 8
+// CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTATOMICTMP17_ASCAST]], align 8
+// CHECK-NEXT: [[TMP29:%.*]] = atomicrmw or ptr [[TMP26]], i64 [[TMP28]] syncscope("singlethread-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP29]], ptr [[ATOMIC_TEMP18_ASCAST]], align 8
+// CHECK-NEXT: [[TMP30:%.*]] = load i64, ptr [[ATOMIC_TEMP18_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP30]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP32]], ptr [[DOTATOMICTMP19_ASCAST]], align 8
+// CHECK-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTATOMICTMP19_ASCAST]], align 8
+// CHECK-NEXT: [[TMP34:%.*]] = atomicrmw xor ptr [[TMP31]], i64 [[TMP33]] syncscope("singlethread-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP34]], ptr [[ATOMIC_TEMP20_ASCAST]], align 8
+// CHECK-NEXT: [[TMP35:%.*]] = load i64, ptr [[ATOMIC_TEMP20_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP35]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP37:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP37]], ptr [[DOTATOMICTMP21_ASCAST]], align 8
+// CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTATOMICTMP21_ASCAST]], align 8
+// CHECK-NEXT: [[TMP39:%.*]] = atomicrmw min ptr [[TMP36]], i64 [[TMP38]] syncscope("singlethread-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP39]], ptr [[ATOMIC_TEMP22_ASCAST]], align 8
+// CHECK-NEXT: [[TMP40:%.*]] = load i64, ptr [[ATOMIC_TEMP22_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP40]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP42:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP42]], ptr [[DOTATOMICTMP23_ASCAST]], align 8
+// CHECK-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTATOMICTMP23_ASCAST]], align 8
+// CHECK-NEXT: [[TMP44:%.*]] = atomicrmw max ptr [[TMP41]], i64 [[TMP43]] syncscope("singlethread-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP44]], ptr [[ATOMIC_TEMP24_ASCAST]], align 8
+// CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[ATOMIC_TEMP24_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP45]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP46:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP47:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP47]], ptr [[DOTATOMICTMP25_ASCAST]], align 8
+// CHECK-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTATOMICTMP25_ASCAST]], align 8
+// CHECK-NEXT: store atomic i64 [[TMP48]], ptr [[TMP46]] syncscope("singlethread-one-as") monotonic, align 8
+// CHECK-NEXT: [[TMP49:%.*]] = load i8, ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV26:%.*]] = trunc i8 [[TMP49]] to i1
+// CHECK-NEXT: br i1 [[LOADEDV26]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK: [[COND_TRUE]]:
+// CHECK-NEXT: [[TMP50:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: br label %[[COND_END:.*]]
+// CHECK: [[COND_FALSE]]:
+// CHECK-NEXT: [[TMP51:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: br label %[[COND_END]]
+// CHECK: [[COND_END]]:
+// CHECK-NEXT: [[COND:%.*]] = phi i64 [ [[TMP50]], %[[COND_TRUE]] ], [ [[TMP51]], %[[COND_FALSE]] ]
+// CHECK-NEXT: ret i64 [[COND]]
+//
__device__ long long atomic64_op_singlethread(long long *ptr, long long *ptr2, long long val, long long desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
@@ -214,11 +1350,64 @@ __device__ long long atomic64_op_singlethread(long long *ptr, long long *ptr2, l
return flag ? val : desired;
}
-// CHECK-LABEL: @_Z25atomicu64_op_singlethreadPyS_yy
-// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: load atomic i64, ptr %{{.*}} syncscope("singlethread-one-as") monotonic, align 8
-// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("singlethread-one-as") monotonic, align 8
+// CHECK-LABEL: define dso_local noundef i64 @_Z25atomicu64_op_singlethreadPyS_yy(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i64 noundef [[VAL:%.*]], i64 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP4:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
+// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
+// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP3]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP4]] to ptr
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw umin ptr [[TMP0]], i64 [[TMP2]] syncscope("singlethread-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[ATOMIC_TEMP_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP4]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP6]], ptr [[DOTATOMICTMP1_ASCAST]], align 8
+// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
+// CHECK-NEXT: [[TMP8:%.*]] = atomicrmw umax ptr [[TMP5]], i64 [[TMP7]] syncscope("singlethread-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP8]], ptr [[ATOMIC_TEMP2_ASCAST]], align 8
+// CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[ATOMIC_TEMP2_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP9]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP11:%.*]] = load atomic i64, ptr [[TMP10]] syncscope("singlethread-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP11]], ptr [[ATOMIC_TEMP3_ASCAST]], align 8
+// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[ATOMIC_TEMP3_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP12]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP14]], ptr [[DOTATOMICTMP4_ASCAST]], align 8
+// CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTATOMICTMP4_ASCAST]], align 8
+// CHECK-NEXT: store atomic i64 [[TMP15]], ptr [[TMP13]] syncscope("singlethread-one-as") monotonic, align 8
+// CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: ret i64 [[TMP16]]
+//
__device__ unsigned long long atomicu64_op_singlethread(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
@@ -227,19 +1416,182 @@ __device__ unsigned long long atomicu64_op_singlethread(unsigned long long *ptr,
return val;
}
-// CHECK-LABEL: @_Z21atomic64_op_wavefrontPxS_xx
-// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") monotonic monotonic, align 8
-// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: load atomic i64, ptr {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 8
-// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("wavefront-one-as") monotonic, align 8
+// CHECK-LABEL: define dso_local noundef i64 @_Z21atomic64_op_wavefrontPxS_xx(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i64 noundef [[VAL:%.*]], i64 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[FLAG:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP10:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP15:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP16:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP17:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP18:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP19:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP21:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP22:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP23:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP24:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP25:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP26:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
+// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
+// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
+// CHECK-NEXT: [[FLAG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAG]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// CHECK-NEXT: [[CMPXCHG_BOOL2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL2]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP10]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP15]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP16]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP17_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP17]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP18_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP18]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP19_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP19]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP20]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP21]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP22_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP22]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP23_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP23]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP24_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP24]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP25_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP25]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP26_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP26]] to ptr
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] syncscope("wavefront-one-as") monotonic monotonic, align 8
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK-NEXT: store i8 [[STOREDV]], ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP4]], ptr [[DOTATOMICTMP1_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_EXPECTED3:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i64 [[CMPXCHG_EXPECTED3]], i64 [[CMPXCHG_DESIRED4]] syncscope("wavefront-one-as") monotonic monotonic, align 8
+// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 1
+// CHECK-NEXT: store i64 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
+// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
+// CHECK-NEXT: store i8 [[STOREDV9]], ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP7]], ptr [[DOTATOMICTMP10_ASCAST]], align 8
+// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTATOMICTMP10_ASCAST]], align 8
+// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw xchg ptr [[TMP6]], i64 [[TMP8]] syncscope("wavefront-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP9]], ptr [[ATOMIC_TEMP_ASCAST]], align 8
+// CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[ATOMIC_TEMP_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP10]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP12]], ptr [[DOTATOMICTMP11_ASCAST]], align 8
+// CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTATOMICTMP11_ASCAST]], align 8
+// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP11]], i64 [[TMP13]] syncscope("wavefront-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP14]], ptr [[ATOMIC_TEMP12_ASCAST]], align 8
+// CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[ATOMIC_TEMP12_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP15]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP17]], ptr [[DOTATOMICTMP13_ASCAST]], align 8
+// CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTATOMICTMP13_ASCAST]], align 8
+// CHECK-NEXT: [[TMP19:%.*]] = atomicrmw sub ptr [[TMP16]], i64 [[TMP18]] syncscope("wavefront-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP19]], ptr [[ATOMIC_TEMP14_ASCAST]], align 8
+// CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr [[ATOMIC_TEMP14_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP20]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP22]], ptr [[DOTATOMICTMP15_ASCAST]], align 8
+// CHECK-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTATOMICTMP15_ASCAST]], align 8
+// CHECK-NEXT: [[TMP24:%.*]] = atomicrmw and ptr [[TMP21]], i64 [[TMP23]] syncscope("wavefront-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP24]], ptr [[ATOMIC_TEMP16_ASCAST]], align 8
+// CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[ATOMIC_TEMP16_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP25]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP27:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP27]], ptr [[DOTATOMICTMP17_ASCAST]], align 8
+// CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTATOMICTMP17_ASCAST]], align 8
+// CHECK-NEXT: [[TMP29:%.*]] = atomicrmw or ptr [[TMP26]], i64 [[TMP28]] syncscope("wavefront-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP29]], ptr [[ATOMIC_TEMP18_ASCAST]], align 8
+// CHECK-NEXT: [[TMP30:%.*]] = load i64, ptr [[ATOMIC_TEMP18_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP30]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP32]], ptr [[DOTATOMICTMP19_ASCAST]], align 8
+// CHECK-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTATOMICTMP19_ASCAST]], align 8
+// CHECK-NEXT: [[TMP34:%.*]] = atomicrmw xor ptr [[TMP31]], i64 [[TMP33]] syncscope("wavefront-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP34]], ptr [[ATOMIC_TEMP20_ASCAST]], align 8
+// CHECK-NEXT: [[TMP35:%.*]] = load i64, ptr [[ATOMIC_TEMP20_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP35]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP37:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP37]], ptr [[DOTATOMICTMP21_ASCAST]], align 8
+// CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTATOMICTMP21_ASCAST]], align 8
+// CHECK-NEXT: [[TMP39:%.*]] = atomicrmw min ptr [[TMP36]], i64 [[TMP38]] syncscope("wavefront-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP39]], ptr [[ATOMIC_TEMP22_ASCAST]], align 8
+// CHECK-NEXT: [[TMP40:%.*]] = load i64, ptr [[ATOMIC_TEMP22_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP40]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP42:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP42]], ptr [[DOTATOMICTMP23_ASCAST]], align 8
+// CHECK-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTATOMICTMP23_ASCAST]], align 8
+// CHECK-NEXT: [[TMP44:%.*]] = atomicrmw max ptr [[TMP41]], i64 [[TMP43]] syncscope("wavefront-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP44]], ptr [[ATOMIC_TEMP24_ASCAST]], align 8
+// CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[ATOMIC_TEMP24_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP45]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP46:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP47:%.*]] = load atomic i64, ptr [[TMP46]] syncscope("wavefront-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP47]], ptr [[ATOMIC_TEMP25_ASCAST]], align 8
+// CHECK-NEXT: [[TMP48:%.*]] = load i64, ptr [[ATOMIC_TEMP25_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP48]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP49:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP50:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP50]], ptr [[DOTATOMICTMP26_ASCAST]], align 8
+// CHECK-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTATOMICTMP26_ASCAST]], align 8
+// CHECK-NEXT: store atomic i64 [[TMP51]], ptr [[TMP49]] syncscope("wavefront-one-as") monotonic, align 8
+// CHECK-NEXT: [[TMP52:%.*]] = load i8, ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV27:%.*]] = trunc i8 [[TMP52]] to i1
+// CHECK-NEXT: br i1 [[LOADEDV27]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK: [[COND_TRUE]]:
+// CHECK-NEXT: [[TMP53:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: br label %[[COND_END:.*]]
+// CHECK: [[COND_FALSE]]:
+// CHECK-NEXT: [[TMP54:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: br label %[[COND_END]]
+// CHECK: [[COND_END]]:
+// CHECK-NEXT: [[COND:%.*]] = phi i64 [ [[TMP53]], %[[COND_TRUE]] ], [ [[TMP54]], %[[COND_FALSE]] ]
+// CHECK-NEXT: ret i64 [[COND]]
+//
__device__ long long atomic64_op_wavefront(long long *ptr, long long *ptr2, long long val, long long desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
@@ -256,11 +1608,64 @@ __device__ long long atomic64_op_wavefront(long long *ptr, long long *ptr2, long
return flag ? val : desired;
}
-// CHECK-LABEL: @_Z22atomicu64_op_wavefrontPyS_yy
-// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: load atomic i64, ptr {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 8
-// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("wavefront-one-as") monotonic, align 8
+// CHECK-LABEL: define dso_local noundef i64 @_Z22atomicu64_op_wavefrontPyS_yy(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i64 noundef [[VAL:%.*]], i64 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP4:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
+// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
+// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP3]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP4]] to ptr
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw umin ptr [[TMP0]], i64 [[TMP2]] syncscope("wavefront-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[ATOMIC_TEMP_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP4]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP6]], ptr [[DOTATOMICTMP1_ASCAST]], align 8
+// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
+// CHECK-NEXT: [[TMP8:%.*]] = atomicrmw umax ptr [[TMP5]], i64 [[TMP7]] syncscope("wavefront-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP8]], ptr [[ATOMIC_TEMP2_ASCAST]], align 8
+// CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[ATOMIC_TEMP2_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP9]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP11:%.*]] = load atomic i64, ptr [[TMP10]] syncscope("wavefront-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP11]], ptr [[ATOMIC_TEMP3_ASCAST]], align 8
+// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[ATOMIC_TEMP3_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP12]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP14]], ptr [[DOTATOMICTMP4_ASCAST]], align 8
+// CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTATOMICTMP4_ASCAST]], align 8
+// CHECK-NEXT: store atomic i64 [[TMP15]], ptr [[TMP13]] syncscope("wavefront-one-as") monotonic, align 8
+// CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: ret i64 [[TMP16]]
+//
__device__ unsigned long long atomicu64_op_wavefront(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
@@ -269,18 +1674,175 @@ __device__ unsigned long long atomicu64_op_wavefront(unsigned long long *ptr, un
return val;
}
-// CHECK-LABEL: @_Z21atomic64_op_workgroupPxS_xx
-// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") monotonic monotonic, align 8
-// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("workgroup-one-as") monotonic, align 8
+// CHECK-LABEL: define dso_local noundef i64 @_Z21atomic64_op_workgroupPxS_xx(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i64 noundef [[VAL:%.*]], i64 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[FLAG:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP10:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP15:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP16:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP17:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP18:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP19:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP21:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP22:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP23:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP24:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP25:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
+// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
+// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
+// CHECK-NEXT: [[FLAG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAG]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// CHECK-NEXT: [[CMPXCHG_BOOL2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL2]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP10]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP15]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP16]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP17_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP17]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP18_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP18]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP19_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP19]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP20]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP21]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP22_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP22]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP23_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP23]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP24_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP24]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP25_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP25]] to ptr
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] syncscope("workgroup-one-as") monotonic monotonic, align 8
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK-NEXT: store i8 [[STOREDV]], ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP4]], ptr [[DOTATOMICTMP1_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_EXPECTED3:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i64 [[CMPXCHG_EXPECTED3]], i64 [[CMPXCHG_DESIRED4]] syncscope("workgroup-one-as") monotonic monotonic, align 8
+// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 1
+// CHECK-NEXT: store i64 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
+// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
+// CHECK-NEXT: store i8 [[STOREDV9]], ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP7]], ptr [[DOTATOMICTMP10_ASCAST]], align 8
+// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTATOMICTMP10_ASCAST]], align 8
+// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw xchg ptr [[TMP6]], i64 [[TMP8]] syncscope("workgroup-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP9]], ptr [[ATOMIC_TEMP_ASCAST]], align 8
+// CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[ATOMIC_TEMP_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP10]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP12]], ptr [[DOTATOMICTMP11_ASCAST]], align 8
+// CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTATOMICTMP11_ASCAST]], align 8
+// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP11]], i64 [[TMP13]] syncscope("workgroup-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP14]], ptr [[ATOMIC_TEMP12_ASCAST]], align 8
+// CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[ATOMIC_TEMP12_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP15]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP17]], ptr [[DOTATOMICTMP13_ASCAST]], align 8
+// CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTATOMICTMP13_ASCAST]], align 8
+// CHECK-NEXT: [[TMP19:%.*]] = atomicrmw sub ptr [[TMP16]], i64 [[TMP18]] syncscope("workgroup-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP19]], ptr [[ATOMIC_TEMP14_ASCAST]], align 8
+// CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr [[ATOMIC_TEMP14_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP20]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP22]], ptr [[DOTATOMICTMP15_ASCAST]], align 8
+// CHECK-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTATOMICTMP15_ASCAST]], align 8
+// CHECK-NEXT: [[TMP24:%.*]] = atomicrmw and ptr [[TMP21]], i64 [[TMP23]] syncscope("workgroup-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP24]], ptr [[ATOMIC_TEMP16_ASCAST]], align 8
+// CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[ATOMIC_TEMP16_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP25]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP27:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP27]], ptr [[DOTATOMICTMP17_ASCAST]], align 8
+// CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTATOMICTMP17_ASCAST]], align 8
+// CHECK-NEXT: [[TMP29:%.*]] = atomicrmw or ptr [[TMP26]], i64 [[TMP28]] syncscope("workgroup-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP29]], ptr [[ATOMIC_TEMP18_ASCAST]], align 8
+// CHECK-NEXT: [[TMP30:%.*]] = load i64, ptr [[ATOMIC_TEMP18_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP30]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP32]], ptr [[DOTATOMICTMP19_ASCAST]], align 8
+// CHECK-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTATOMICTMP19_ASCAST]], align 8
+// CHECK-NEXT: [[TMP34:%.*]] = atomicrmw xor ptr [[TMP31]], i64 [[TMP33]] syncscope("workgroup-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP34]], ptr [[ATOMIC_TEMP20_ASCAST]], align 8
+// CHECK-NEXT: [[TMP35:%.*]] = load i64, ptr [[ATOMIC_TEMP20_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP35]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP37:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP37]], ptr [[DOTATOMICTMP21_ASCAST]], align 8
+// CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTATOMICTMP21_ASCAST]], align 8
+// CHECK-NEXT: [[TMP39:%.*]] = atomicrmw min ptr [[TMP36]], i64 [[TMP38]] syncscope("workgroup-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP39]], ptr [[ATOMIC_TEMP22_ASCAST]], align 8
+// CHECK-NEXT: [[TMP40:%.*]] = load i64, ptr [[ATOMIC_TEMP22_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP40]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP42:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP42]], ptr [[DOTATOMICTMP23_ASCAST]], align 8
+// CHECK-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTATOMICTMP23_ASCAST]], align 8
+// CHECK-NEXT: [[TMP44:%.*]] = atomicrmw max ptr [[TMP41]], i64 [[TMP43]] syncscope("workgroup-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP44]], ptr [[ATOMIC_TEMP24_ASCAST]], align 8
+// CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[ATOMIC_TEMP24_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP45]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP46:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP47:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP47]], ptr [[DOTATOMICTMP25_ASCAST]], align 8
+// CHECK-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTATOMICTMP25_ASCAST]], align 8
+// CHECK-NEXT: store atomic i64 [[TMP48]], ptr [[TMP46]] syncscope("workgroup-one-as") monotonic, align 8
+// CHECK-NEXT: [[TMP49:%.*]] = load i8, ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV26:%.*]] = trunc i8 [[TMP49]] to i1
+// CHECK-NEXT: br i1 [[LOADEDV26]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK: [[COND_TRUE]]:
+// CHECK-NEXT: [[TMP50:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: br label %[[COND_END:.*]]
+// CHECK: [[COND_FALSE]]:
+// CHECK-NEXT: [[TMP51:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: br label %[[COND_END]]
+// CHECK: [[COND_END]]:
+// CHECK-NEXT: [[COND:%.*]] = phi i64 [ [[TMP50]], %[[COND_TRUE]] ], [ [[TMP51]], %[[COND_FALSE]] ]
+// CHECK-NEXT: ret i64 [[COND]]
+//
__device__ long long atomic64_op_workgroup(long long *ptr, long long *ptr2, long long val, long long desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
@@ -296,10 +1858,57 @@ __device__ long long atomic64_op_workgroup(long long *ptr, long long *ptr2, long
return flag ? val : desired;
}
-// CHECK-LABEL: @_Z22atomicu64_op_workgroupPyS_yy
-// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("workgroup-one-as") monotonic, align 8
+// CHECK-LABEL: define dso_local noundef i64 @_Z22atomicu64_op_workgroupPyS_yy(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i64 noundef [[VAL:%.*]], i64 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
+// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
+// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw umin ptr [[TMP0]], i64 [[TMP2]] syncscope("workgroup-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[ATOMIC_TEMP_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP4]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP6]], ptr [[DOTATOMICTMP1_ASCAST]], align 8
+// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
+// CHECK-NEXT: [[TMP8:%.*]] = atomicrmw umax ptr [[TMP5]], i64 [[TMP7]] syncscope("workgroup-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP8]], ptr [[ATOMIC_TEMP2_ASCAST]], align 8
+// CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[ATOMIC_TEMP2_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP9]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP11]], ptr [[DOTATOMICTMP3_ASCAST]], align 8
+// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTATOMICTMP3_ASCAST]], align 8
+// CHECK-NEXT: store atomic i64 [[TMP12]], ptr [[TMP10]] syncscope("workgroup-one-as") monotonic, align 8
+// CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: ret i64 [[TMP13]]
+//
__device__ unsigned long long atomicu64_op_workgroup(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
@@ -307,18 +1916,175 @@ __device__ unsigned long long atomicu64_op_workgroup(unsigned long long *ptr, un
return val;
}
-// CHECK-LABEL: @_Z17atomic64_op_agentPxS_xx
-// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") monotonic monotonic, align 8
-// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("agent-one-as") monotonic, align 8
+// CHECK-LABEL: define dso_local noundef i64 @_Z17atomic64_op_agentPxS_xx(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i64 noundef [[VAL:%.*]], i64 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[FLAG:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP10:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP15:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP16:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP17:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP18:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP19:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP21:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP22:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP23:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP24:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP25:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
+// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
+// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
+// CHECK-NEXT: [[FLAG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAG]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// CHECK-NEXT: [[CMPXCHG_BOOL2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL2]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP10]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP15]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP16]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP17_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP17]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP18_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP18]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP19_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP19]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP20]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP21]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP22_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP22]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP23_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP23]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP24_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP24]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP25_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP25]] to ptr
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] syncscope("agent-one-as") monotonic monotonic, align 8
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK-NEXT: store i8 [[STOREDV]], ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP4]], ptr [[DOTATOMICTMP1_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_EXPECTED3:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i64 [[CMPXCHG_EXPECTED3]], i64 [[CMPXCHG_DESIRED4]] syncscope("agent-one-as") monotonic monotonic, align 8
+// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 1
+// CHECK-NEXT: store i64 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
+// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
+// CHECK-NEXT: store i8 [[STOREDV9]], ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP7]], ptr [[DOTATOMICTMP10_ASCAST]], align 8
+// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTATOMICTMP10_ASCAST]], align 8
+// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw xchg ptr [[TMP6]], i64 [[TMP8]] syncscope("agent-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP9]], ptr [[ATOMIC_TEMP_ASCAST]], align 8
+// CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[ATOMIC_TEMP_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP10]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP12]], ptr [[DOTATOMICTMP11_ASCAST]], align 8
+// CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTATOMICTMP11_ASCAST]], align 8
+// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP11]], i64 [[TMP13]] syncscope("agent-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP14]], ptr [[ATOMIC_TEMP12_ASCAST]], align 8
+// CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[ATOMIC_TEMP12_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP15]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP17]], ptr [[DOTATOMICTMP13_ASCAST]], align 8
+// CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTATOMICTMP13_ASCAST]], align 8
+// CHECK-NEXT: [[TMP19:%.*]] = atomicrmw sub ptr [[TMP16]], i64 [[TMP18]] syncscope("agent-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP19]], ptr [[ATOMIC_TEMP14_ASCAST]], align 8
+// CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr [[ATOMIC_TEMP14_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP20]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP22]], ptr [[DOTATOMICTMP15_ASCAST]], align 8
+// CHECK-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTATOMICTMP15_ASCAST]], align 8
+// CHECK-NEXT: [[TMP24:%.*]] = atomicrmw and ptr [[TMP21]], i64 [[TMP23]] syncscope("agent-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP24]], ptr [[ATOMIC_TEMP16_ASCAST]], align 8
+// CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[ATOMIC_TEMP16_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP25]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP27:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP27]], ptr [[DOTATOMICTMP17_ASCAST]], align 8
+// CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTATOMICTMP17_ASCAST]], align 8
+// CHECK-NEXT: [[TMP29:%.*]] = atomicrmw or ptr [[TMP26]], i64 [[TMP28]] syncscope("agent-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP29]], ptr [[ATOMIC_TEMP18_ASCAST]], align 8
+// CHECK-NEXT: [[TMP30:%.*]] = load i64, ptr [[ATOMIC_TEMP18_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP30]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP32]], ptr [[DOTATOMICTMP19_ASCAST]], align 8
+// CHECK-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTATOMICTMP19_ASCAST]], align 8
+// CHECK-NEXT: [[TMP34:%.*]] = atomicrmw xor ptr [[TMP31]], i64 [[TMP33]] syncscope("agent-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP34]], ptr [[ATOMIC_TEMP20_ASCAST]], align 8
+// CHECK-NEXT: [[TMP35:%.*]] = load i64, ptr [[ATOMIC_TEMP20_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP35]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP37:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP37]], ptr [[DOTATOMICTMP21_ASCAST]], align 8
+// CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTATOMICTMP21_ASCAST]], align 8
+// CHECK-NEXT: [[TMP39:%.*]] = atomicrmw min ptr [[TMP36]], i64 [[TMP38]] syncscope("agent-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP39]], ptr [[ATOMIC_TEMP22_ASCAST]], align 8
+// CHECK-NEXT: [[TMP40:%.*]] = load i64, ptr [[ATOMIC_TEMP22_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP40]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP42:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP42]], ptr [[DOTATOMICTMP23_ASCAST]], align 8
+// CHECK-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTATOMICTMP23_ASCAST]], align 8
+// CHECK-NEXT: [[TMP44:%.*]] = atomicrmw max ptr [[TMP41]], i64 [[TMP43]] syncscope("agent-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP44]], ptr [[ATOMIC_TEMP24_ASCAST]], align 8
+// CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[ATOMIC_TEMP24_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP45]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP46:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP47:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP47]], ptr [[DOTATOMICTMP25_ASCAST]], align 8
+// CHECK-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTATOMICTMP25_ASCAST]], align 8
+// CHECK-NEXT: store atomic i64 [[TMP48]], ptr [[TMP46]] syncscope("agent-one-as") monotonic, align 8
+// CHECK-NEXT: [[TMP49:%.*]] = load i8, ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV26:%.*]] = trunc i8 [[TMP49]] to i1
+// CHECK-NEXT: br i1 [[LOADEDV26]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK: [[COND_TRUE]]:
+// CHECK-NEXT: [[TMP50:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: br label %[[COND_END:.*]]
+// CHECK: [[COND_FALSE]]:
+// CHECK-NEXT: [[TMP51:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: br label %[[COND_END]]
+// CHECK: [[COND_END]]:
+// CHECK-NEXT: [[COND:%.*]] = phi i64 [ [[TMP50]], %[[COND_TRUE]] ], [ [[TMP51]], %[[COND_FALSE]] ]
+// CHECK-NEXT: ret i64 [[COND]]
+//
__device__ long long atomic64_op_agent(long long *ptr, long long *ptr2, long long val, long long desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
@@ -334,10 +2100,57 @@ __device__ long long atomic64_op_agent(long long *ptr, long long *ptr2, long lon
return flag ? val : desired;
}
-// CHECK-LABEL: @_Z18atomicu64_op_agentPyS_yy
-// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("agent-one-as") monotonic, align 8
+// CHECK-LABEL: define dso_local noundef i64 @_Z18atomicu64_op_agentPyS_yy(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i64 noundef [[VAL:%.*]], i64 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
+// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
+// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw umin ptr [[TMP0]], i64 [[TMP2]] syncscope("agent-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[ATOMIC_TEMP_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP4]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP6]], ptr [[DOTATOMICTMP1_ASCAST]], align 8
+// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
+// CHECK-NEXT: [[TMP8:%.*]] = atomicrmw umax ptr [[TMP5]], i64 [[TMP7]] syncscope("agent-one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP8]], ptr [[ATOMIC_TEMP2_ASCAST]], align 8
+// CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[ATOMIC_TEMP2_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP9]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP11]], ptr [[DOTATOMICTMP3_ASCAST]], align 8
+// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTATOMICTMP3_ASCAST]], align 8
+// CHECK-NEXT: store atomic i64 [[TMP12]], ptr [[TMP10]] syncscope("agent-one-as") monotonic, align 8
+// CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: ret i64 [[TMP13]]
+//
__device__ unsigned long long atomicu64_op_agent(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
@@ -345,19 +2158,182 @@ __device__ unsigned long long atomicu64_op_agent(unsigned long long *ptr, unsign
return val;
}
-// CHECK-LABEL: @_Z18atomic64_op_systemPxS_xx
-// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
-// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") monotonic monotonic, align 8
-// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
-// CHECK: load i64, ptr %{{.*}}, align 8
-// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("one-as") monotonic, align 8
+// CHECK-LABEL: define dso_local noundef i64 @_Z18atomic64_op_systemPxS_xx(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i64 noundef [[VAL:%.*]], i64 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[FLAG:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP10:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP15:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP16:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP17:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP18:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP19:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP21:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP22:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP23:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP24:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP25:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP26:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
+// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
+// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
+// CHECK-NEXT: [[FLAG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAG]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// CHECK-NEXT: [[CMPXCHG_BOOL2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL2]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP10]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP15]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP16]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP17_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP17]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP18_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP18]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP19_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP19]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP20]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP21]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP22_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP22]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP23_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP23]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP24_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP24]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP25_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP25]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP26_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP26]] to ptr
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] syncscope("one-as") monotonic monotonic, align 8
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK-NEXT: store i8 [[STOREDV]], ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP4]], ptr [[DOTATOMICTMP1_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_EXPECTED3:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i64 [[CMPXCHG_EXPECTED3]], i64 [[CMPXCHG_DESIRED4]] syncscope("one-as") monotonic monotonic, align 8
+// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 1
+// CHECK-NEXT: store i64 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
+// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
+// CHECK-NEXT: store i8 [[STOREDV9]], ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP7]], ptr [[DOTATOMICTMP10_ASCAST]], align 8
+// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTATOMICTMP10_ASCAST]], align 8
+// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw xchg ptr [[TMP6]], i64 [[TMP8]] syncscope("one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP9]], ptr [[ATOMIC_TEMP_ASCAST]], align 8
+// CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[ATOMIC_TEMP_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP10]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP12]], ptr [[DOTATOMICTMP11_ASCAST]], align 8
+// CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTATOMICTMP11_ASCAST]], align 8
+// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP11]], i64 [[TMP13]] syncscope("one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP14]], ptr [[ATOMIC_TEMP12_ASCAST]], align 8
+// CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[ATOMIC_TEMP12_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP15]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP17]], ptr [[DOTATOMICTMP13_ASCAST]], align 8
+// CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTATOMICTMP13_ASCAST]], align 8
+// CHECK-NEXT: [[TMP19:%.*]] = atomicrmw sub ptr [[TMP16]], i64 [[TMP18]] syncscope("one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP19]], ptr [[ATOMIC_TEMP14_ASCAST]], align 8
+// CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr [[ATOMIC_TEMP14_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP20]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP22]], ptr [[DOTATOMICTMP15_ASCAST]], align 8
+// CHECK-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTATOMICTMP15_ASCAST]], align 8
+// CHECK-NEXT: [[TMP24:%.*]] = atomicrmw and ptr [[TMP21]], i64 [[TMP23]] syncscope("one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP24]], ptr [[ATOMIC_TEMP16_ASCAST]], align 8
+// CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[ATOMIC_TEMP16_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP25]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP27:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP27]], ptr [[DOTATOMICTMP17_ASCAST]], align 8
+// CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTATOMICTMP17_ASCAST]], align 8
+// CHECK-NEXT: [[TMP29:%.*]] = atomicrmw or ptr [[TMP26]], i64 [[TMP28]] syncscope("one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP29]], ptr [[ATOMIC_TEMP18_ASCAST]], align 8
+// CHECK-NEXT: [[TMP30:%.*]] = load i64, ptr [[ATOMIC_TEMP18_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP30]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP32]], ptr [[DOTATOMICTMP19_ASCAST]], align 8
+// CHECK-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTATOMICTMP19_ASCAST]], align 8
+// CHECK-NEXT: [[TMP34:%.*]] = atomicrmw xor ptr [[TMP31]], i64 [[TMP33]] syncscope("one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP34]], ptr [[ATOMIC_TEMP20_ASCAST]], align 8
+// CHECK-NEXT: [[TMP35:%.*]] = load i64, ptr [[ATOMIC_TEMP20_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP35]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP37:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP37]], ptr [[DOTATOMICTMP21_ASCAST]], align 8
+// CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTATOMICTMP21_ASCAST]], align 8
+// CHECK-NEXT: [[TMP39:%.*]] = atomicrmw min ptr [[TMP36]], i64 [[TMP38]] syncscope("one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP39]], ptr [[ATOMIC_TEMP22_ASCAST]], align 8
+// CHECK-NEXT: [[TMP40:%.*]] = load i64, ptr [[ATOMIC_TEMP22_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP40]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP42:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP42]], ptr [[DOTATOMICTMP23_ASCAST]], align 8
+// CHECK-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTATOMICTMP23_ASCAST]], align 8
+// CHECK-NEXT: [[TMP44:%.*]] = atomicrmw max ptr [[TMP41]], i64 [[TMP43]] syncscope("one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP44]], ptr [[ATOMIC_TEMP24_ASCAST]], align 8
+// CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[ATOMIC_TEMP24_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP45]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP46:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP47:%.*]] = load atomic i64, ptr [[TMP46]] syncscope("one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP47]], ptr [[ATOMIC_TEMP25_ASCAST]], align 8
+// CHECK-NEXT: [[TMP48:%.*]] = load i64, ptr [[ATOMIC_TEMP25_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP48]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP49:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP50:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP50]], ptr [[DOTATOMICTMP26_ASCAST]], align 8
+// CHECK-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTATOMICTMP26_ASCAST]], align 8
+// CHECK-NEXT: store atomic i64 [[TMP51]], ptr [[TMP49]] syncscope("one-as") monotonic, align 8
+// CHECK-NEXT: [[TMP52:%.*]] = load i8, ptr [[FLAG_ASCAST]], align 1
+// CHECK-NEXT: [[LOADEDV27:%.*]] = trunc i8 [[TMP52]] to i1
+// CHECK-NEXT: br i1 [[LOADEDV27]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK: [[COND_TRUE]]:
+// CHECK-NEXT: [[TMP53:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: br label %[[COND_END:.*]]
+// CHECK: [[COND_FALSE]]:
+// CHECK-NEXT: [[TMP54:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: br label %[[COND_END]]
+// CHECK: [[COND_END]]:
+// CHECK-NEXT: [[COND:%.*]] = phi i64 [ [[TMP53]], %[[COND_TRUE]] ], [ [[TMP54]], %[[COND_FALSE]] ]
+// CHECK-NEXT: ret i64 [[COND]]
+//
__device__ long long atomic64_op_system(long long *ptr, long long *ptr2, long long val, long long desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
@@ -374,11 +2350,64 @@ __device__ long long atomic64_op_system(long long *ptr, long long *ptr2, long lo
return flag ? val : desired;
}
-// CHECK-LABEL: @_Z19atomicu64_op_systemPyS_yy
-// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
-// CHECK: load i64, ptr %{{.*}}, align 8
-// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("one-as") monotonic, align 8
+// CHECK-LABEL: define dso_local noundef i64 @_Z19atomicu64_op_systemPyS_yy(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i64 noundef [[VAL:%.*]], i64 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP4:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
+// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
+// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
+// CHECK-NEXT: [[ATOMIC_TEMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP3]] to ptr
+// CHECK-NEXT: [[DOTATOMICTMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP4]] to ptr
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw umin ptr [[TMP0]], i64 [[TMP2]] syncscope("one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[ATOMIC_TEMP_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP4]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP6]], ptr [[DOTATOMICTMP1_ASCAST]], align 8
+// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
+// CHECK-NEXT: [[TMP8:%.*]] = atomicrmw umax ptr [[TMP5]], i64 [[TMP7]] syncscope("one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP8]], ptr [[ATOMIC_TEMP2_ASCAST]], align 8
+// CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[ATOMIC_TEMP2_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP9]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP11:%.*]] = load atomic i64, ptr [[TMP10]] syncscope("one-as") monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP11]], ptr [[ATOMIC_TEMP3_ASCAST]], align 8
+// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[ATOMIC_TEMP3_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP12]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP14]], ptr [[DOTATOMICTMP4_ASCAST]], align 8
+// CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTATOMICTMP4_ASCAST]], align 8
+// CHECK-NEXT: store atomic i64 [[TMP15]], ptr [[TMP13]] syncscope("one-as") monotonic, align 8
+// CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: ret i64 [[TMP16]]
+//
__device__ unsigned long long atomicu64_op_system(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
diff --git a/clang/test/CodeGenOpenCL/atomic-ops.cl b/clang/test/CodeGenOpenCL/atomic-ops.cl
index 5e2de38ac3d3e3..a0a2ae4e63a879 100644
--- a/clang/test/CodeGenOpenCL/atomic-ops.cl
+++ b/clang/test/CodeGenOpenCL/atomic-ops.cl
@@ -1,3 +1,4 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa \
// RUN: | FileCheck %s
@@ -35,309 +36,693 @@ typedef enum memory_scope {
atomic_int j;
+// CHECK-LABEL: define dso_local void @fi1(
+// CHECK-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[I_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("workgroup") seq_cst, align 4
+// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(5) [[X]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load atomic i32, ptr [[TMP3]] syncscope("agent") seq_cst, align 4
+// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(5) [[ATOMIC_TEMP1]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP1]], align 4
+// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(5) [[X]], align 4
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
+// CHECK-NEXT: [[TMP7:%.*]] = load atomic i32, ptr [[TMP6]] seq_cst, align 4
+// CHECK-NEXT: store i32 [[TMP7]], ptr addrspace(5) [[ATOMIC_TEMP2]], align 4
+// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP2]], align 4
+// CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(5) [[X]], align 4
+// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
+// CHECK-NEXT: [[TMP10:%.*]] = load atomic i32, ptr [[TMP9]] syncscope("wavefront") seq_cst, align 4
+// CHECK-NEXT: store i32 [[TMP10]], ptr addrspace(5) [[ATOMIC_TEMP3]], align 4
+// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP3]], align 4
+// CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(5) [[X]], align 4
+// CHECK-NEXT: ret void
+//
void fi1(atomic_int *i) {
- // CHECK-LABEL: @fi1
- // CHECK: load atomic i32, ptr %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
int x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
- // CHECK: load atomic i32, ptr %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst, align 4
x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device);
- // CHECK: load atomic i32, ptr %{{[.0-9A-Z_a-z]+}} seq_cst, align 4
x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices);
- // CHECK: load atomic i32, ptr %{{[.0-9A-Z_a-z]+}} syncscope("wavefront") seq_cst, align 4
x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group);
}
+// CHECK-LABEL: define dso_local void @fi2(
+// CHECK-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[I_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
+// CHECK-NEXT: store i32 1, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: store atomic i32 [[TMP1]], ptr [[TMP0]] syncscope("workgroup") seq_cst, align 4
+// CHECK-NEXT: ret void
+//
void fi2(atomic_int *i) {
- // CHECK-LABEL: @fi2
- // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, ptr %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
__opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group);
}
+// CHECK-LABEL: define dso_local void @test_addr(
+// CHECK-SAME: ptr addrspace(1) noundef [[IG:%.*]], ptr addrspace(5) noundef [[IP:%.*]], ptr addrspace(3) noundef [[IL:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[IG_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT: [[IP_ADDR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// CHECK-NEXT: [[IL_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: store ptr addrspace(1) [[IG]], ptr addrspace(5) [[IG_ADDR]], align 8
+// CHECK-NEXT: store ptr addrspace(5) [[IP]], ptr addrspace(5) [[IP_ADDR]], align 4
+// CHECK-NEXT: store ptr addrspace(3) [[IL]], ptr addrspace(5) [[IL_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IG_ADDR]], align 8
+// CHECK-NEXT: store i32 1, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: store atomic i32 [[TMP1]], ptr addrspace(1) [[TMP0]] syncscope("workgroup") seq_cst, align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[IP_ADDR]], align 4
+// CHECK-NEXT: store i32 1, ptr addrspace(5) [[DOTATOMICTMP1]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP1]], align 4
+// CHECK-NEXT: store atomic i32 [[TMP3]], ptr addrspace(5) [[TMP2]] syncscope("workgroup") seq_cst, align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(5) [[IL_ADDR]], align 4
+// CHECK-NEXT: store i32 1, ptr addrspace(5) [[DOTATOMICTMP2]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP2]], align 4
+// CHECK-NEXT: store atomic i32 [[TMP5]], ptr addrspace(3) [[TMP4]] syncscope("workgroup") seq_cst, align 4
+// CHECK-NEXT: ret void
+//
void test_addr(global atomic_int *ig, private atomic_int *ip, local atomic_int *il) {
- // CHECK-LABEL: @test_addr
- // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, ptr addrspace(1) %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
__opencl_atomic_store(ig, 1, memory_order_seq_cst, memory_scope_work_group);
- // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, ptr addrspace(5) %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
__opencl_atomic_store(ip, 1, memory_order_seq_cst, memory_scope_work_group);
- // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, ptr addrspace(3) %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
__opencl_atomic_store(il, 1, memory_order_seq_cst, memory_scope_work_group);
}
+// CHECK-LABEL: define dso_local void @fi3(
+// CHECK-SAME: ptr noundef [[I:%.*]], ptr noundef [[UI:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[UI_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP7:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[I_ADDR]], align 8
+// CHECK-NEXT: store ptr [[UI]], ptr addrspace(5) [[UI_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
+// CHECK-NEXT: store i32 1, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr [[TMP0]], i32 [[TMP1]] syncscope("workgroup") seq_cst, align 4
+// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[X]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
+// CHECK-NEXT: store i32 1, ptr addrspace(5) [[DOTATOMICTMP1]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP1]], align 4
+// CHECK-NEXT: [[TMP6:%.*]] = atomicrmw min ptr [[TMP4]], i32 [[TMP5]] syncscope("workgroup") seq_cst, align 4
+// CHECK-NEXT: store i32 [[TMP6]], ptr addrspace(5) [[ATOMIC_TEMP2]], align 4
+// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP2]], align 4
+// CHECK-NEXT: store i32 [[TMP7]], ptr addrspace(5) [[X]], align 4
+// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
+// CHECK-NEXT: store i32 1, ptr addrspace(5) [[DOTATOMICTMP3]], align 4
+// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP3]], align 4
+// CHECK-NEXT: [[TMP10:%.*]] = atomicrmw max ptr [[TMP8]], i32 [[TMP9]] syncscope("workgroup") seq_cst, align 4
+// CHECK-NEXT: store i32 [[TMP10]], ptr addrspace(5) [[ATOMIC_TEMP4]], align 4
+// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP4]], align 4
+// CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(5) [[X]], align 4
+// CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr addrspace(5) [[UI_ADDR]], align 8
+// CHECK-NEXT: store i32 1, ptr addrspace(5) [[DOTATOMICTMP5]], align 4
+// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP5]], align 4
+// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw umin ptr [[TMP12]], i32 [[TMP13]] syncscope("workgroup") seq_cst, align 4
+// CHECK-NEXT: store i32 [[TMP14]], ptr addrspace(5) [[ATOMIC_TEMP6]], align 4
+// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP6]], align 4
+// CHECK-NEXT: store i32 [[TMP15]], ptr addrspace(5) [[X]], align 4
+// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr addrspace(5) [[UI_ADDR]], align 8
+// CHECK-NEXT: store i32 1, ptr addrspace(5) [[DOTATOMICTMP7]], align 4
+// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP7]], align 4
+// CHECK-NEXT: [[TMP18:%.*]] = atomicrmw umax ptr [[TMP16]], i32 [[TMP17]] syncscope("workgroup") seq_cst, align 4
+// CHECK-NEXT: store i32 [[TMP18]], ptr addrspace(5) [[ATOMIC_TEMP8]], align 4
+// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP8]], align 4
+// CHECK-NEXT: store i32 [[TMP19]], ptr addrspace(5) [[X]], align 4
+// CHECK-NEXT: ret void
+//
void fi3(atomic_int *i, atomic_uint *ui) {
- // CHECK-LABEL: @fi3
- // CHECK: atomicrmw and ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group);
- // CHECK: atomicrmw min ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group);
- // CHECK: atomicrmw max ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group);
- // CHECK: atomicrmw umin ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group);
- // CHECK: atomicrmw umax ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group);
}
+// CHECK-LABEL: define dso_local zeroext i1 @fi4(
+// CHECK-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[I_ADDR]], align 8
+// CHECK-NEXT: store i32 0, ptr addrspace(5) [[CMP]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
+// CHECK-NEXT: store i32 1, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr addrspace(5) [[CMP]], align 4
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("workgroup-one-as") acquire acquire, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr addrspace(5) [[CMP]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(5) [[CMPXCHG_BOOL]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP1]] to i1
+// CHECK-NEXT: ret i1 [[LOADEDV]]
+//
bool fi4(atomic_int *i) {
- // CHECK-LABEL: @fi4(
- // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg ptr [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup-one-as") acquire acquire, align 4
- // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
- // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
- // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
- // CHECK: store i32 [[OLD]]
int cmp = 0;
return __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire, memory_scope_work_group);
}
+// CHECK-LABEL: define dso_local void @fi5(
+// CHECK-SAME: ptr noundef [[I:%.*]], i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[I_ADDR]], align 8
+// CHECK-NEXT: store i32 [[SCOPE]], ptr addrspace(5) [[SCOPE_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[SCOPE_ADDR]], align 4
+// CHECK-NEXT: switch i32 [[TMP1]], label %[[OPENCL_ALLSVMDEVICES:.*]] [
+// CHECK-NEXT: i32 1, label %[[OPENCL_WORKGROUP:.*]]
+// CHECK-NEXT: i32 2, label %[[OPENCL_DEVICE:.*]]
+// CHECK-NEXT: i32 4, label %[[OPENCL_SUBGROUP:.*]]
+// CHECK-NEXT: ]
+// CHECK: [[OPENCL_WORKGROUP]]:
+// CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("workgroup") seq_cst, align 4
+// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE:.*]]
+// CHECK: [[OPENCL_DEVICE]]:
+// CHECK-NEXT: [[TMP3:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("agent") seq_cst, align 4
+// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// CHECK: [[OPENCL_ALLSVMDEVICES]]:
+// CHECK-NEXT: [[TMP4:%.*]] = load atomic i32, ptr [[TMP0]] seq_cst, align 4
+// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// CHECK: [[OPENCL_SUBGROUP]]:
+// CHECK-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("wavefront") seq_cst, align 4
+// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// CHECK: [[ATOMIC_SCOPE_CONTINUE]]:
+// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: store i32 [[TMP6]], ptr addrspace(5) [[X]], align 4
+// CHECK-NEXT: ret void
+//
void fi5(atomic_int *i, int scope) {
- // CHECK-LABEL: @fi5
- // CHECK: switch i32 %{{.*}}, label %[[opencl_allsvmdevices:.*]] [
- // CHECK-NEXT: i32 1, label %[[opencl_workgroup:.*]]
- // CHECK-NEXT: i32 2, label %[[opencl_device:.*]]
- // CHECK-NEXT: i32 4, label %[[opencl_subgroup:.*]]
- // CHECK-NEXT: ]
- // CHECK: [[opencl_workgroup]]:
- // CHECK: load atomic i32, ptr %{{.*}} syncscope("workgroup") seq_cst, align 4
- // CHECK: br label %[[continue:.*]]
- // CHECK: [[opencl_device]]:
- // CHECK: load atomic i32, ptr %{{.*}} syncscope("agent") seq_cst, align 4
- // CHECK: br label %[[continue]]
- // CHECK: [[opencl_allsvmdevices]]:
- // CHECK: load atomic i32, ptr %{{.*}} seq_cst, align 4
- // CHECK: br label %[[continue]]
- // CHECK: [[opencl_subgroup]]:
- // CHECK: load atomic i32, ptr %{{.*}} syncscope("wavefront") seq_cst, align 4
- // CHECK: br label %[[continue]]
- // CHECK: [[continue]]:
int x = __opencl_atomic_load(i, memory_order_seq_cst, scope);
}
+// CHECK-LABEL: define dso_local void @fi6(
+// CHECK-SAME: ptr noundef [[I:%.*]], i32 noundef [[ORDER:%.*]], i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[ORDER_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[I_ADDR]], align 8
+// CHECK-NEXT: store i32 [[ORDER]], ptr addrspace(5) [[ORDER_ADDR]], align 4
+// CHECK-NEXT: store i32 [[SCOPE]], ptr addrspace(5) [[SCOPE_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[ORDER_ADDR]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[SCOPE_ADDR]], align 4
+// CHECK-NEXT: switch i32 [[TMP1]], label %[[MONOTONIC:.*]] [
+// CHECK-NEXT: i32 1, label %[[ACQUIRE:.*]]
+// CHECK-NEXT: i32 2, label %[[ACQUIRE]]
+// CHECK-NEXT: i32 5, label %[[SEQCST:.*]]
+// CHECK-NEXT: ]
+// CHECK: [[MONOTONIC]]:
+// CHECK-NEXT: switch i32 [[TMP2]], label %[[OPENCL_ALLSVMDEVICES:.*]] [
+// CHECK-NEXT: i32 1, label %[[OPENCL_WORKGROUP:.*]]
+// CHECK-NEXT: i32 2, label %[[OPENCL_DEVICE:.*]]
+// CHECK-NEXT: i32 4, label %[[OPENCL_SUBGROUP:.*]]
+// CHECK-NEXT: ]
+// CHECK: [[ACQUIRE]]:
+// CHECK-NEXT: switch i32 [[TMP2]], label %[[OPENCL_ALLSVMDEVICES3:.*]] [
+// CHECK-NEXT: i32 1, label %[[OPENCL_WORKGROUP1:.*]]
+// CHECK-NEXT: i32 2, label %[[OPENCL_DEVICE2:.*]]
+// CHECK-NEXT: i32 4, label %[[OPENCL_SUBGROUP4:.*]]
+// CHECK-NEXT: ]
+// CHECK: [[SEQCST]]:
+// CHECK-NEXT: switch i32 [[TMP2]], label %[[OPENCL_ALLSVMDEVICES8:.*]] [
+// CHECK-NEXT: i32 1, label %[[OPENCL_WORKGROUP6:.*]]
+// CHECK-NEXT: i32 2, label %[[OPENCL_DEVICE7:.*]]
+// CHECK-NEXT: i32 4, label %[[OPENCL_SUBGROUP9:.*]]
+// CHECK-NEXT: ]
+// CHECK: [[ATOMIC_CONTINUE:.*]]:
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[X]], align 4
+// CHECK-NEXT: ret void
+// CHECK: [[OPENCL_WORKGROUP]]:
+// CHECK-NEXT: [[TMP4:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("workgroup-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE:.*]]
+// CHECK: [[OPENCL_DEVICE]]:
+// CHECK-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("agent-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// CHECK: [[OPENCL_ALLSVMDEVICES]]:
+// CHECK-NEXT: [[TMP6:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP6]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// CHECK: [[OPENCL_SUBGROUP]]:
+// CHECK-NEXT: [[TMP7:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("wavefront-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP7]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// CHECK: [[ATOMIC_SCOPE_CONTINUE]]:
+// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
+// CHECK: [[OPENCL_WORKGROUP1]]:
+// CHECK-NEXT: [[TMP8:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("workgroup-one-as") acquire, align 4
+// CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE5:.*]]
+// CHECK: [[OPENCL_DEVICE2]]:
+// CHECK-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("agent-one-as") acquire, align 4
+// CHECK-NEXT: store i32 [[TMP9]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE5]]
+// CHECK: [[OPENCL_ALLSVMDEVICES3]]:
+// CHECK-NEXT: [[TMP10:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("one-as") acquire, align 4
+// CHECK-NEXT: store i32 [[TMP10]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE5]]
+// CHECK: [[OPENCL_SUBGROUP4]]:
+// CHECK-NEXT: [[TMP11:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("wavefront-one-as") acquire, align 4
+// CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE5]]
+// CHECK: [[ATOMIC_SCOPE_CONTINUE5]]:
+// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
+// CHECK: [[OPENCL_WORKGROUP6]]:
+// CHECK-NEXT: [[TMP12:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("workgroup") seq_cst, align 4
+// CHECK-NEXT: store i32 [[TMP12]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE10:.*]]
+// CHECK: [[OPENCL_DEVICE7]]:
+// CHECK-NEXT: [[TMP13:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("agent") seq_cst, align 4
+// CHECK-NEXT: store i32 [[TMP13]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE10]]
+// CHECK: [[OPENCL_ALLSVMDEVICES8]]:
+// CHECK-NEXT: [[TMP14:%.*]] = load atomic i32, ptr [[TMP0]] seq_cst, align 4
+// CHECK-NEXT: store i32 [[TMP14]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE10]]
+// CHECK: [[OPENCL_SUBGROUP9]]:
+// CHECK-NEXT: [[TMP15:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("wavefront") seq_cst, align 4
+// CHECK-NEXT: store i32 [[TMP15]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE10]]
+// CHECK: [[ATOMIC_SCOPE_CONTINUE10]]:
+// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
+//
void fi6(atomic_int *i, int order, int scope) {
- // CHECK-LABEL: @fi6
- // CHECK: switch i32 %{{.*}}, label %[[monotonic:.*]] [
- // CHECK-NEXT: i32 1, label %[[acquire:.*]]
- // CHECK-NEXT: i32 2, label %[[acquire:.*]]
- // CHECK-NEXT: i32 5, label %[[seqcst:.*]]
- // CHECK-NEXT: ]
- // CHECK: [[monotonic]]:
- // CHECK: switch i32 %{{.*}}, label %[[MON_ALL:.*]] [
- // CHECK-NEXT: i32 1, label %[[MON_WG:.*]]
- // CHECK-NEXT: i32 2, label %[[MON_DEV:.*]]
- // CHECK-NEXT: i32 4, label %[[MON_SUB:.*]]
- // CHECK-NEXT: ]
- // CHECK: [[acquire]]:
- // CHECK: switch i32 %{{.*}}, label %[[ACQ_ALL:.*]] [
- // CHECK-NEXT: i32 1, label %[[ACQ_WG:.*]]
- // CHECK-NEXT: i32 2, label %[[ACQ_DEV:.*]]
- // CHECK-NEXT: i32 4, label %[[ACQ_SUB:.*]]
- // CHECK-NEXT: ]
- // CHECK: [[seqcst]]:
- // CHECK: switch i32 %{{.*}}, label %[[SEQ_ALL:.*]] [
- // CHECK-NEXT: i32 1, label %[[SEQ_WG:.*]]
- // CHECK-NEXT: i32 2, label %[[SEQ_DEV:.*]]
- // CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]]
- // CHECK-NEXT: ]
- // CHECK: [[MON_WG]]:
- // CHECK: load atomic i32, ptr %{{.*}} syncscope("workgroup-one-as") monotonic, align 4
- // CHECK: [[MON_DEV]]:
- // CHECK: load atomic i32, ptr %{{.*}} syncscope("agent-one-as") monotonic, align 4
- // CHECK: [[MON_ALL]]:
- // CHECK: load atomic i32, ptr %{{.*}} monotonic, align 4
- // CHECK: [[MON_SUB]]:
- // CHECK: load atomic i32, ptr %{{.*}} syncscope("wavefront-one-as") monotonic, align 4
- // CHECK: [[ACQ_WG]]:
- // CHECK: load atomic i32, ptr %{{.*}} syncscope("workgroup-one-as") acquire, align 4
- // CHECK: [[ACQ_DEV]]:
- // CHECK: load atomic i32, ptr %{{.*}} syncscope("agent-one-as") acquire, align 4
- // CHECK: [[ACQ_ALL]]:
- // CHECK: load atomic i32, ptr %{{.*}} acquire, align 4
- // CHECK: [[ACQ_SUB]]:
- // CHECK: load atomic i32, ptr %{{.*}} syncscope("wavefront-one-as") acquire, align 4
- // CHECK: [[SEQ_WG]]:
- // CHECK: load atomic i32, ptr %{{.*}} syncscope("workgroup") seq_cst, align 4
- // CHECK: [[SEQ_DEV]]:
- // CHECK: load atomic i32, ptr %{{.*}} syncscope("agent") seq_cst, align 4
- // CHECK: [[SEQ_ALL]]:
- // CHECK: load atomic i32, ptr %{{.*}} seq_cst, align 4
- // CHECK: [[SEQ_SUB]]:
- // CHECK: load atomic i32, ptr %{{.*}} syncscope("wavefront") seq_cst, align 4
int x = __opencl_atomic_load(i, order, scope);
}
+// CHECK-LABEL: define dso_local float @ff1(
+// CHECK-SAME: ptr addrspace(1) noundef [[D:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: store ptr addrspace(1) [[D]], ptr addrspace(5) [[D_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[D_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr addrspace(1) [[TMP0]] syncscope("workgroup-one-as") monotonic, align 4
+// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: ret float [[TMP2]]
+//
float ff1(global atomic_float *d) {
- // CHECK-LABEL: @ff1
- // CHECK: load atomic i32, ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic, align 4
return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_group);
}
+// CHECK-LABEL: define dso_local void @ff2(
+// CHECK-SAME: ptr noundef [[D:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: store ptr [[D]], ptr addrspace(5) [[D_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[D_ADDR]], align 8
+// CHECK-NEXT: store float 1.000000e+00, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: store atomic i32 [[TMP1]], ptr [[TMP0]] syncscope("workgroup-one-as") release, align 4
+// CHECK-NEXT: ret void
+//
void ff2(atomic_float *d) {
- // CHECK-LABEL: @ff2
- // CHECK: store atomic i32 {{.*}} syncscope("workgroup-one-as") release, align 4
__opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_group);
}
+// CHECK-LABEL: define dso_local float @ff3(
+// CHECK-SAME: ptr noundef [[D:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: store ptr [[D]], ptr addrspace(5) [[D_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[D_ADDR]], align 8
+// CHECK-NEXT: store float 2.000000e+00, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i32 [[TMP1]] syncscope("workgroup") seq_cst, align 4
+// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: ret float [[TMP3]]
+//
float ff3(atomic_float *d) {
- // CHECK-LABEL: @ff3
- // CHECK: atomicrmw xchg ptr {{.*}} syncscope("workgroup") seq_cst, align 4
return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group);
}
+// CHECK-LABEL: define dso_local float @ff4(
+// CHECK-SAME: ptr addrspace(1) noundef [[D:%.*]], float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: store ptr addrspace(1) [[D]], ptr addrspace(5) [[D_ADDR]], align 8
+// CHECK-NEXT: store float [[A]], ptr addrspace(5) [[A_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[D_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[A_ADDR]], align 4
+// CHECK-NEXT: store float [[TMP1]], ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP0]], float [[TMP2]] syncscope("workgroup-one-as") monotonic, align 4
+// CHECK-NEXT: store float [[TMP3]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: ret float [[TMP4]]
+//
float ff4(global atomic_float *d, float a) {
- // CHECK-LABEL: @ff4
- // CHECK: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic
return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group);
}
+// CHECK-LABEL: define dso_local float @ff5(
+// CHECK-SAME: ptr addrspace(1) noundef [[D:%.*]], double noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-NEXT: store ptr addrspace(1) [[D]], ptr addrspace(5) [[D_ADDR]], align 8
+// CHECK-NEXT: store double [[A]], ptr addrspace(5) [[A_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[D_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(5) [[A_ADDR]], align 8
+// CHECK-NEXT: store double [[TMP1]], ptr addrspace(5) [[DOTATOMICTMP]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load double, ptr addrspace(5) [[DOTATOMICTMP]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP0]], double [[TMP2]] syncscope("workgroup-one-as") monotonic, align 8
+// CHECK-NEXT: store double [[TMP3]], ptr addrspace(5) [[ATOMIC_TEMP]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load double, ptr addrspace(5) [[ATOMIC_TEMP]], align 8
+// CHECK-NEXT: [[CONV:%.*]] = fptrunc double [[TMP4]] to float
+// CHECK-NEXT: ret float [[CONV]]
+//
float ff5(global atomic_double *d, double a) {
- // CHECK-LABEL: @ff5
- // CHECK: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic
return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group);
}
-// CHECK-LABEL: @atomic_init_foo
+// CHECK-LABEL: define dso_local void @atomic_init_foo(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: store i32 42, ptr addrspace(1) @j, align 4
+// CHECK-NEXT: ret void
+//
void atomic_init_foo()
{
- // CHECK-NOT: atomic
- // CHECK: store
__opencl_atomic_init(&j, 42);
- // CHECK-NOT: atomic
- // CHECK: }
}
-// CHECK-LABEL: @failureOrder
+// CHECK-LABEL: define dso_local void @failureOrder(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: store ptr [[PTR]], ptr addrspace(5) [[PTR_ADDR]], align 8
+// CHECK-NEXT: store ptr [[PTR2]], ptr addrspace(5) [[PTR2_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[PTR_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr addrspace(5) [[PTR2_ADDR]], align 8
+// CHECK-NEXT: store i32 43, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("workgroup-one-as") acquire monotonic, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[TMP1]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(5) [[CMPXCHG_BOOL]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr addrspace(5) [[PTR_ADDR]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr addrspace(5) [[PTR2_ADDR]], align 8
+// CHECK-NEXT: store i32 43, ptr addrspace(5) [[DOTATOMICTMP1]], align 4
+// CHECK-NEXT: [[CMPXCHG_EXPECTED3:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP1]], align 4
+// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i32 [[CMPXCHG_EXPECTED3]], i32 [[CMPXCHG_DESIRED4]] syncscope("workgroup") seq_cst acquire, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV6]], ptr [[TMP4]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr addrspace(5) [[CMPXCHG_BOOL2]], align 1
+// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
+// CHECK-NEXT: ret void
+//
void failureOrder(atomic_int *ptr, int *ptr2) {
- // CHECK: cmpxchg ptr {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup-one-as") acquire monotonic, align 4
__opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group);
- // CHECK: cmpxchg weak ptr {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire, align 4
__opencl_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire, memory_scope_work_group);
}
-// CHECK-LABEL: @generalFailureOrder
+// CHECK-LABEL: define dso_local void @generalFailureOrder(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i32 noundef [[SUCCESS:%.*]], i32 noundef [[FAIL:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[SUCCESS_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[FAIL_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: store ptr [[PTR]], ptr addrspace(5) [[PTR_ADDR]], align 8
+// CHECK-NEXT: store ptr [[PTR2]], ptr addrspace(5) [[PTR2_ADDR]], align 8
+// CHECK-NEXT: store i32 [[SUCCESS]], ptr addrspace(5) [[SUCCESS_ADDR]], align 4
+// CHECK-NEXT: store i32 [[FAIL]], ptr addrspace(5) [[FAIL_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[PTR_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[SUCCESS_ADDR]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[PTR2_ADDR]], align 8
+// CHECK-NEXT: store i32 42, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(5) [[FAIL_ADDR]], align 4
+// CHECK-NEXT: switch i32 [[TMP1]], label %[[MONOTONIC:.*]] [
+// CHECK-NEXT: i32 1, label %[[ACQUIRE:.*]]
+// CHECK-NEXT: i32 2, label %[[ACQUIRE]]
+// CHECK-NEXT: i32 3, label %[[RELEASE:.*]]
+// CHECK-NEXT: i32 4, label %[[ACQREL:.*]]
+// CHECK-NEXT: i32 5, label %[[SEQCST:.*]]
+// CHECK-NEXT: ]
+// CHECK: [[MONOTONIC]]:
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: switch i32 [[TMP3]], label %[[MONOTONIC_FAIL:.*]] [
+// CHECK-NEXT: i32 1, label %[[ACQUIRE_FAIL:.*]]
+// CHECK-NEXT: i32 2, label %[[ACQUIRE_FAIL]]
+// CHECK-NEXT: i32 5, label %[[SEQCST_FAIL:.*]]
+// CHECK-NEXT: ]
+// CHECK: [[ACQUIRE]]:
+// CHECK-NEXT: [[CMPXCHG_EXPECTED8:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK-NEXT: [[CMPXCHG_DESIRED9:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: switch i32 [[TMP3]], label %[[MONOTONIC_FAIL10:.*]] [
+// CHECK-NEXT: i32 1, label %[[ACQUIRE_FAIL11:.*]]
+// CHECK-NEXT: i32 2, label %[[ACQUIRE_FAIL11]]
+// CHECK-NEXT: i32 5, label %[[SEQCST_FAIL12:.*]]
+// CHECK-NEXT: ]
+// CHECK: [[RELEASE]]:
+// CHECK-NEXT: [[CMPXCHG_EXPECTED24:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK-NEXT: [[CMPXCHG_DESIRED25:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: switch i32 [[TMP3]], label %[[MONOTONIC_FAIL26:.*]] [
+// CHECK-NEXT: i32 1, label %[[ACQUIRE_FAIL27:.*]]
+// CHECK-NEXT: i32 2, label %[[ACQUIRE_FAIL27]]
+// CHECK-NEXT: i32 5, label %[[SEQCST_FAIL28:.*]]
+// CHECK-NEXT: ]
+// CHECK: [[ACQREL]]:
+// CHECK-NEXT: [[CMPXCHG_EXPECTED40:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK-NEXT: [[CMPXCHG_DESIRED41:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: switch i32 [[TMP3]], label %[[MONOTONIC_FAIL42:.*]] [
+// CHECK-NEXT: i32 1, label %[[ACQUIRE_FAIL43:.*]]
+// CHECK-NEXT: i32 2, label %[[ACQUIRE_FAIL43]]
+// CHECK-NEXT: i32 5, label %[[SEQCST_FAIL44:.*]]
+// CHECK-NEXT: ]
+// CHECK: [[SEQCST]]:
+// CHECK-NEXT: [[CMPXCHG_EXPECTED56:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK-NEXT: [[CMPXCHG_DESIRED57:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: switch i32 [[TMP3]], label %[[MONOTONIC_FAIL58:.*]] [
+// CHECK-NEXT: i32 1, label %[[ACQUIRE_FAIL59:.*]]
+// CHECK-NEXT: i32 2, label %[[ACQUIRE_FAIL59]]
+// CHECK-NEXT: i32 5, label %[[SEQCST_FAIL60:.*]]
+// CHECK-NEXT: ]
+// CHECK: [[ATOMIC_CONTINUE:.*]]:
+// CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr addrspace(5) [[CMPXCHG_BOOL]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP4]] to i1
+// CHECK-NEXT: ret void
+// CHECK: [[MONOTONIC_FAIL]]:
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("workgroup-one-as") monotonic monotonic, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[TMP2]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_CONTINUE1:.*]]
+// CHECK: [[ACQUIRE_FAIL]]:
+// CHECK-NEXT: [[CMPXCHG_PAIR2:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("workgroup-one-as") monotonic acquire, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV3:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR2]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS4:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR2]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV3]], ptr [[TMP2]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_CONTINUE1]]
+// CHECK: [[SEQCST_FAIL]]:
+// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("workgroup-one-as") monotonic seq_cst, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV6]], ptr [[TMP2]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_CONTINUE1]]
+// CHECK: [[ATOMIC_CONTINUE1]]:
+// CHECK-NEXT: [[CMPXCGH_SUCCESS:%.*]] = phi i1 [ [[CMPXCHG_SUCCESS]], %[[MONOTONIC_FAIL]] ], [ [[CMPXCHG_SUCCESS4]], %[[ACQUIRE_FAIL]] ], [ [[CMPXCHG_SUCCESS7]], %[[SEQCST_FAIL]] ]
+// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
+// CHECK: [[MONOTONIC_FAIL10]]:
+// CHECK-NEXT: [[CMPXCHG_PAIR14:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED8]], i32 [[CMPXCHG_DESIRED9]] syncscope("workgroup-one-as") acquire monotonic, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV15:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR14]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS16:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR14]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV15]], ptr [[TMP2]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_CONTINUE13:.*]]
+// CHECK: [[ACQUIRE_FAIL11]]:
+// CHECK-NEXT: [[CMPXCHG_PAIR17:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED8]], i32 [[CMPXCHG_DESIRED9]] syncscope("workgroup-one-as") acquire acquire, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV18:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR17]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS19:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR17]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV18]], ptr [[TMP2]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_CONTINUE13]]
+// CHECK: [[SEQCST_FAIL12]]:
+// CHECK-NEXT: [[CMPXCHG_PAIR20:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED8]], i32 [[CMPXCHG_DESIRED9]] syncscope("workgroup-one-as") acquire seq_cst, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV21:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR20]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS22:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR20]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV21]], ptr [[TMP2]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_CONTINUE13]]
+// CHECK: [[ATOMIC_CONTINUE13]]:
+// CHECK-NEXT: [[CMPXCGH_SUCCESS23:%.*]] = phi i1 [ [[CMPXCHG_SUCCESS16]], %[[MONOTONIC_FAIL10]] ], [ [[CMPXCHG_SUCCESS19]], %[[ACQUIRE_FAIL11]] ], [ [[CMPXCHG_SUCCESS22]], %[[SEQCST_FAIL12]] ]
+// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
+// CHECK: [[MONOTONIC_FAIL26]]:
+// CHECK-NEXT: [[CMPXCHG_PAIR30:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED24]], i32 [[CMPXCHG_DESIRED25]] syncscope("workgroup-one-as") release monotonic, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV31:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR30]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS32:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR30]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV31]], ptr [[TMP2]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_CONTINUE29:.*]]
+// CHECK: [[ACQUIRE_FAIL27]]:
+// CHECK-NEXT: [[CMPXCHG_PAIR33:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED24]], i32 [[CMPXCHG_DESIRED25]] syncscope("workgroup-one-as") release acquire, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV34:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR33]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS35:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR33]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV34]], ptr [[TMP2]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_CONTINUE29]]
+// CHECK: [[SEQCST_FAIL28]]:
+// CHECK-NEXT: [[CMPXCHG_PAIR36:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED24]], i32 [[CMPXCHG_DESIRED25]] syncscope("workgroup-one-as") release seq_cst, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV37:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR36]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS38:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR36]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV37]], ptr [[TMP2]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_CONTINUE29]]
+// CHECK: [[ATOMIC_CONTINUE29]]:
+// CHECK-NEXT: [[CMPXCGH_SUCCESS39:%.*]] = phi i1 [ [[CMPXCHG_SUCCESS32]], %[[MONOTONIC_FAIL26]] ], [ [[CMPXCHG_SUCCESS35]], %[[ACQUIRE_FAIL27]] ], [ [[CMPXCHG_SUCCESS38]], %[[SEQCST_FAIL28]] ]
+// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
+// CHECK: [[MONOTONIC_FAIL42]]:
+// CHECK-NEXT: [[CMPXCHG_PAIR46:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED40]], i32 [[CMPXCHG_DESIRED41]] syncscope("workgroup-one-as") acq_rel monotonic, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV47:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR46]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS48:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR46]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV47]], ptr [[TMP2]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_CONTINUE45:.*]]
+// CHECK: [[ACQUIRE_FAIL43]]:
+// CHECK-NEXT: [[CMPXCHG_PAIR49:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED40]], i32 [[CMPXCHG_DESIRED41]] syncscope("workgroup-one-as") acq_rel acquire, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV50:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR49]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS51:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR49]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV50]], ptr [[TMP2]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_CONTINUE45]]
+// CHECK: [[SEQCST_FAIL44]]:
+// CHECK-NEXT: [[CMPXCHG_PAIR52:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED40]], i32 [[CMPXCHG_DESIRED41]] syncscope("workgroup-one-as") acq_rel seq_cst, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV53:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR52]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS54:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR52]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV53]], ptr [[TMP2]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_CONTINUE45]]
+// CHECK: [[ATOMIC_CONTINUE45]]:
+// CHECK-NEXT: [[CMPXCGH_SUCCESS55:%.*]] = phi i1 [ [[CMPXCHG_SUCCESS48]], %[[MONOTONIC_FAIL42]] ], [ [[CMPXCHG_SUCCESS51]], %[[ACQUIRE_FAIL43]] ], [ [[CMPXCHG_SUCCESS54]], %[[SEQCST_FAIL44]] ]
+// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
+// CHECK: [[MONOTONIC_FAIL58]]:
+// CHECK-NEXT: [[CMPXCHG_PAIR62:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED56]], i32 [[CMPXCHG_DESIRED57]] syncscope("workgroup") seq_cst monotonic, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV63:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR62]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS64:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR62]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV63]], ptr [[TMP2]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_CONTINUE61:.*]]
+// CHECK: [[ACQUIRE_FAIL59]]:
+// CHECK-NEXT: [[CMPXCHG_PAIR65:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED56]], i32 [[CMPXCHG_DESIRED57]] syncscope("workgroup") seq_cst acquire, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV66:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR65]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS67:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR65]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV66]], ptr [[TMP2]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_CONTINUE61]]
+// CHECK: [[SEQCST_FAIL60]]:
+// CHECK-NEXT: [[CMPXCHG_PAIR68:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED56]], i32 [[CMPXCHG_DESIRED57]] syncscope("workgroup") seq_cst seq_cst, align 4
+// CHECK-NEXT: [[CMPXCHG_PREV69:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR68]], 0
+// CHECK-NEXT: [[CMPXCHG_SUCCESS70:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR68]], 1
+// CHECK-NEXT: store i32 [[CMPXCHG_PREV69]], ptr [[TMP2]], align 4
+// CHECK-NEXT: br label %[[ATOMIC_CONTINUE61]]
+// CHECK: [[ATOMIC_CONTINUE61]]:
+// CHECK-NEXT: [[CMPXCGH_SUCCESS71:%.*]] = phi i1 [ [[CMPXCHG_SUCCESS64]], %[[MONOTONIC_FAIL58]] ], [ [[CMPXCHG_SUCCESS67]], %[[ACQUIRE_FAIL59]] ], [ [[CMPXCHG_SUCCESS70]], %[[SEQCST_FAIL60]] ]
+// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
+//
void generalFailureOrder(atomic_int *ptr, int *ptr2, int success, int fail) {
__opencl_atomic_compare_exchange_strong(ptr, ptr2, 42, success, fail, memory_scope_work_group);
-// CHECK: switch i32 {{.*}}, label %[[MONOTONIC:[0-9a-zA-Z._]+]] [
- // CHECK-NEXT: i32 1, label %[[ACQUIRE:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 2, label %[[ACQUIRE]]
- // CHECK-NEXT: i32 3, label %[[RELEASE:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 4, label %[[ACQREL:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 5, label %[[SEQCST:[0-9a-zA-Z._]+]]
-
- // CHECK: [[MONOTONIC]]
- // CHECK: switch {{.*}}, label %[[MONOTONIC_MONOTONIC:[0-9a-zA-Z._]+]] [
- // CHECK-NEXT: i32 1, label %[[MONOTONIC_ACQUIRE:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 2, label %[[MONOTONIC_ACQUIRE:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 5, label %[[MONOTONIC_SEQCST:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: ]
-
- // CHECK: [[ACQUIRE]]
- // CHECK: switch {{.*}}, label %[[ACQUIRE_MONOTONIC:[0-9a-zA-Z._]+]] [
- // CHECK-NEXT: i32 1, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 2, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 5, label %[[ACQUIRE_SEQCST:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: ]
-
- // CHECK: [[RELEASE]]
- // CHECK: switch {{.*}}, label %[[RELEASE_MONOTONIC:[0-9a-zA-Z._]+]] [
- // CHECK-NEXT: i32 1, label %[[RELEASE_ACQUIRE:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 2, label %[[RELEASE_ACQUIRE:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 5, label %[[RELEASE_SEQCST:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: ]
-
- // CHECK: [[ACQREL]]
- // CHECK: switch {{.*}}, label %[[ACQREL_MONOTONIC:[0-9a-zA-Z._]+]] [
- // CHECK-NEXT: i32 1, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 2, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 5, label %[[ACQREL_SEQCST:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: ]
-
- // CHECK: [[SEQCST]]
- // CHECK: switch {{.*}}, label %[[SEQCST_MONOTONIC:[0-9a-zA-Z._]+]] [
- // CHECK-NEXT: i32 1, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: i32 2, label %[[SEQCST_ACQUIRE]]
- // CHECK-NEXT: i32 5, label %[[SEQCST_SEQCST:[0-9a-zA-Z._]+]]
- // CHECK-NEXT: ]
-
- // CHECK: [[MONOTONIC_MONOTONIC]]
- // CHECK: cmpxchg {{.*}} monotonic monotonic, align 4
- // CHECK: br
-
- // CHECK: [[MONOTONIC_ACQUIRE]]
- // CHECK: cmpxchg {{.*}} monotonic acquire, align 4
- // CHECK: br
-
- // CHECK: [[MONOTONIC_SEQCST]]
- // CHECK: cmpxchg {{.*}} monotonic seq_cst, align 4
- // CHECK: br
-
- // CHECK: [[ACQUIRE_MONOTONIC]]
- // CHECK: cmpxchg {{.*}} acquire monotonic, align 4
- // CHECK: br
-
- // CHECK: [[ACQUIRE_ACQUIRE]]
- // CHECK: cmpxchg {{.*}} acquire acquire, align 4
- // CHECK: br
-
- // CHECK: [[ACQUIRE_SEQCST]]
- // CHECK: cmpxchg {{.*}} acquire seq_cst, align 4
- // CHECK: br
-
- // CHECK: [[RELEASE_MONOTONIC]]
- // CHECK: cmpxchg {{.*}} release monotonic, align 4
- // CHECK: br
-
- // CHECK: [[RELEASE_ACQUIRE]]
- // CHECK: cmpxchg {{.*}} release acquire, align 4
- // CHECK: br
-
- // CHECK: [[RELEASE_SEQCST]]
- // CHECK: cmpxchg {{.*}} release seq_cst, align 4
- // CHECK: br
-
- // CHECK: [[ACQREL_MONOTONIC]]
- // CHECK: cmpxchg {{.*}} acq_rel monotonic, align 4
- // CHECK: br
-
- // CHECK: [[ACQREL_ACQUIRE]]
- // CHECK: cmpxchg {{.*}} acq_rel acquire, align 4
- // CHECK: br
-
- // CHECK: [[ACQREL_SEQCST]]
- // CHECK: cmpxchg {{.*}} acq_rel seq_cst, align 4
- // CHECK: br
-
- // CHECK: [[SEQCST_MONOTONIC]]
- // CHECK: cmpxchg {{.*}} seq_cst monotonic, align 4
- // CHECK: br
-
- // CHECK: [[SEQCST_ACQUIRE]]
- // CHECK: cmpxchg {{.*}} seq_cst acquire, align 4
- // CHECK: br
-
- // CHECK: [[SEQCST_SEQCST]]
- // CHECK: cmpxchg {{.*}} seq_cst seq_cst, align 4
- // CHECK: br
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
}
+// CHECK-LABEL: define dso_local i32 @test_volatile(
+// CHECK-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[I_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load atomic volatile i32, ptr [[TMP0]] syncscope("workgroup") seq_cst, align 4
+// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: ret i32 [[TMP2]]
+//
int test_volatile(volatile atomic_int *i) {
- // CHECK-LABEL: @test_volatile
- // CHECK: %[[i_addr:.*]] = alloca ptr
- // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32
- // CHECK-NEXT: store ptr %i, ptr addrspace(5) %[[i_addr]]
- // CHECK-NEXT: %[[addr:.*]] = load ptr, ptr addrspace(5) %[[i_addr]]
- // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, ptr %[[addr]] syncscope("workgroup") seq_cst, align 4
- // CHECK-NEXT: store i32 %[[res]], ptr addrspace(5) %[[atomicdst]]
- // CHECK-NEXT: %[[retval:.*]] = load i32, ptr addrspace(5) %[[atomicdst]]
- // CHECK-NEXT: ret i32 %[[retval]]
return __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
}
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
index 754f09c19fb357..e394113403b8a2 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
@@ -441,6 +441,39 @@ TLI_DEFINE_ENUM_INTERNAL(atomic_store)
TLI_DEFINE_STRING_INTERNAL("__atomic_store")
TLI_DEFINE_SIG_INTERNAL(Void, SizeT, Ptr, Ptr, Int)
+
+/// bool __atomic_compare_exchange(size_t size, void *obj, void *expected, void *desired, int success, int failure)
+TLI_DEFINE_ENUM_INTERNAL(atomic_compare_exchange)
+TLI_DEFINE_STRING_INTERNAL("__atomic_compare_exchange")
+TLI_DEFINE_SIG_INTERNAL(Bool, SizeT, Ptr, Ptr, Ptr, Int, Int)
+
+
+/// bool __atomic_compare_exchange_1(void *obj, void *expected, uint8_t desired, int success, int failure)
+TLI_DEFINE_ENUM_INTERNAL(atomic_compare_exchange_1)
+TLI_DEFINE_STRING_INTERNAL("__atomic_compare_exchange_1")
+TLI_DEFINE_SIG_INTERNAL(Bool, Ptr, Ptr, Int8, Int, Int)
+
+/// bool __atomic_compare_exchange_2(void *obj, void *expected, uint16_t desired, int success, int failure)
+TLI_DEFINE_ENUM_INTERNAL(atomic_compare_exchange_2)
+TLI_DEFINE_STRING_INTERNAL("__atomic_compare_exchange_2")
+TLI_DEFINE_SIG_INTERNAL(Bool, Ptr, Ptr, Int16, Int, Int)
+
+/// bool __atomic_compare_exchange_4(void *obj, void *expected, uint32_t desired, int success, int failure)
+TLI_DEFINE_ENUM_INTERNAL(atomic_compare_exchange_4)
+TLI_DEFINE_STRING_INTERNAL("__atomic_compare_exchange_4")
+TLI_DEFINE_SIG_INTERNAL(Bool, Ptr, Ptr, Int32, Int, Int)
+
+/// bool __atomic_compare_exchange_8(void *obj, void *expected, uint64_t desired, int success, int failure)
+TLI_DEFINE_ENUM_INTERNAL(atomic_compare_exchange_8)
+TLI_DEFINE_STRING_INTERNAL("__atomic_compare_exchange_8")
+TLI_DEFINE_SIG_INTERNAL(Bool, Ptr, Ptr, Int64, Int, Int)
+
+/// bool __atomic_compare_exchange_16(void *obj, void *expected, uint128_t desired, int success, int failure)
+TLI_DEFINE_ENUM_INTERNAL(atomic_compare_exchange_16)
+TLI_DEFINE_STRING_INTERNAL("__atomic_compare_exchange_16")
+TLI_DEFINE_SIG_INTERNAL(Bool, Ptr, Ptr, Int128, Int, Int)
+
+
/// double __cosh_finite(double x);
TLI_DEFINE_ENUM_INTERNAL(cosh_finite)
TLI_DEFINE_STRING_INTERNAL("__cosh_finite")
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index 89aaf6d1ad83f8..195f4e215ae94a 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -17,6 +17,7 @@
//===----------------------------------------------------------------------===//
// NOTE: NO INCLUDE GUARD DESIRED!
+// FIXME: Redundant with Analysis/TargetLibraryInfo.def
// Provide definitions of macros so that users of this file do not have to
// define everything to use it...
diff --git a/llvm/include/llvm/MC/TargetRegistry.h b/llvm/include/llvm/MC/TargetRegistry.h
index 42d510c17bce39..74d4f33f45fdbb 100644
--- a/llvm/include/llvm/MC/TargetRegistry.h
+++ b/llvm/include/llvm/MC/TargetRegistry.h
@@ -456,12 +456,7 @@ class Target {
StringRef TT, StringRef CPU, StringRef Features,
const TargetOptions &Options, std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM = std::nullopt,
- CodeGenOptLevel OL = CodeGenOptLevel::Default, bool JIT = false) const {
- if (!TargetMachineCtorFn)
- return nullptr;
- return TargetMachineCtorFn(*this, Triple(TT), CPU, Features, Options, RM,
- CM, OL, JIT);
- }
+ CodeGenOptLevel OL = CodeGenOptLevel::Default, bool JIT = false) const;
/// createMCAsmBackend - Create a target specific assembly parser.
MCAsmBackend *createMCAsmBackend(const MCSubtargetInfo &STI,
diff --git a/llvm/include/llvm/Support/AtomicOrdering.h b/llvm/include/llvm/Support/AtomicOrdering.h
index e08c1b262a92b5..9c1d3801020a3c 100644
--- a/llvm/include/llvm/Support/AtomicOrdering.h
+++ b/llvm/include/llvm/Support/AtomicOrdering.h
@@ -158,6 +158,28 @@ inline AtomicOrderingCABI toCABI(AtomicOrdering AO) {
return lookup[static_cast<size_t>(AO)];
}
+inline AtomicOrdering fromCABI(AtomicOrderingCABI AO) {
+ // Acquire is the the closest but still stronger ordering of consume.
+ static const AtomicOrdering lookup[8] = {
+ /* relaxed */ AtomicOrdering::Monotonic,
+ /* consume */ AtomicOrdering::Acquire,
+ /* acquire */ AtomicOrdering::Acquire,
+ /* release */ AtomicOrdering::Release,
+ /* acq_rel */ AtomicOrdering::AcquireRelease,
+ /* acq_seq */ AtomicOrdering::SequentiallyConsistent,
+ };
+ return lookup[static_cast<size_t>(AO)];
+}
+
+inline AtomicOrdering fromCABI(int64_t AO) {
+ if (!isValidAtomicOrderingCABI(AO)) {
+ // This fallback is what CGAtomic does
+ return AtomicOrdering::Monotonic;
+ }
+ assert(isValidAtomicOrderingCABI(AO));
+ return fromCABI(static_cast<AtomicOrderingCABI>(AO));
+}
+
} // end namespace llvm
#endif // LLVM_SUPPORT_ATOMICORDERING_H
diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
index b8e56c755fbda8..9e288b9c84c4a2 100644
--- a/llvm/include/llvm/Target/TargetMachine.h
+++ b/llvm/include/llvm/Target/TargetMachine.h
@@ -114,6 +114,8 @@ class TargetMachine {
// PGO related tunables.
std::optional<PGOOptions> PGOOption;
+ bool IsValid = true;
+
public:
mutable TargetOptions Options;
@@ -121,6 +123,8 @@ class TargetMachine {
void operator=(const TargetMachine &) = delete;
virtual ~TargetMachine();
+ bool isValid() const { return IsValid; }
+
const Target &getTarget() const { return TheTarget; }
const Triple &getTargetTriple() const { return TargetTriple; }
diff --git a/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h b/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
new file mode 100644
index 00000000000000..7c541a68859bc0
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
@@ -0,0 +1,129 @@
+//===- BuildBuiltins.h - Utility builder for builtins ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some functions for lowering compiler builtins.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_BUILDBUILTINS_H
+#define LLVM_TRANSFORMS_UTILS_BUILDBUILTINS_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/Alignment.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include <cstdint>
+#include <variant>
+
+namespace llvm {
+class Value;
+class TargetLibraryInfo;
+class DataLayout;
+class IRBuilderBase;
+class Type;
+class TargetLowering;
+
+namespace SyncScope {
+typedef uint8_t ID;
+}
+
+/// Emit a call to the __atomic_compare_exchange builtin. This may either be
+/// lowered to the cmpxchg LLVM instruction, or to one of the following libcall
+/// functions: __atomic_compare_exchange_1, __atomic_compare_exchange_2,
+/// __atomic_compare_exchange_4, __atomic_compare_exchange_8,
+/// __atomic_compare_exchange_16, __atomic_compare_exchange.
+///
+/// Also see:
+/// https://llvm.org/docs/Atomics.html
+/// https://llvm.org/docs/LangRef.html#cmpxchg-instruction
+/// https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
+/// https://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary#GCC_intrinsics
+///
+/// @param Ptr The memory location accessed atomically.
+/// @Param ExpectedPtr Pointer to the data expected at /p Ptr. The exchange will
+/// only happen if the value at \p Ptr is equal to this. Data
+/// at \p ExpectedPtr may or may not be be overwritten, so do
+/// not use after this call.
+/// @Param DesiredPtr Pointer to the data that the data at /p Ptr is replaced
+/// with.
+/// @param IsWeak If true, the exchange may not happen even if the data at
+/// \p Ptr equals to \p ExpectedPtr.
+/// @param IsVolatile Whether to mark the access as volatile.
+/// @param SuccessMemorder If the exchange succeeds, memory is affected
+/// according to the memory model.
+/// @param FailureMemorder If the exchange fails, memory is affected according
+/// to the memory model. It is considered an atomic "read"
+/// for the purpose of identifying release sequences. Must
+/// not be release, acquire-release, and at most as strong as
+/// \p SuccessMemorder.
+/// @param Scope (optional) The synchronization scope (domain of threads
+/// where this access has to be atomic, e.g. CUDA
+/// warp/block/grid-level atomics) of this access. Defaults
+/// to system scope.
+/// @param DataTy (optional) Type of the value to be accessed. cmpxchg
+/// supports integer and pointers only. If any other type or
+/// omitted, type-prunes to an integer the holds at least \p
+/// DataSize bytes.
+/// @param PrevPtr (optional) The value that /p Ptr had before the exchange
+/// is stored here.
+/// @param DataSize Number of bytes to be exchanged.
+/// @param AvailableSize The total size that can be used for the atomic
+/// operation. It may include trailing padding in addition to
+/// the data type's size to allow the use power-of-two
+/// instructions/calls.
+/// @param Align (optional) Known alignment of /p Ptr. If omitted,
+/// alignment is inferred from /p Ptr itself and falls back
+/// to no alignment.
+/// @param Builder User to emit instructions.
+/// @param DL The target's data layout.
+/// @param TLI The target's libcall library availability.
+/// @param TL (optional) Used to determine which instructions the
+/// target support. If omitted, assumes all accesses up to a
+/// size of 16 bytes are supported.
+/// @param SyncScopes Available scopes for the target. Only needed if /p Scope
+/// is not a constant.
+/// @param FallbackScope Fallback scope if /p Scope is not an available scope.
+/// @param AllowInstruction Whether a 'cmpxchg' can be emitted. False is used by
+/// AtomicExpandPass that replaces cmpxchg instructions not
+/// supported by the target.
+/// @param AllowSwitch If one of IsWeak,SuccessMemorder,FailureMemorder,Scope is
+/// not a constant, allow emitting a switch for each possible
+/// value since cmpxchg only allows constant arguments for
+/// these.
+/// @param AllowSizedLibcall Allow emitting calls to __atomic_compare_exchange_n
+/// libcall functions.
+///
+/// @return A boolean value that indicates whether the exchange has happened
+/// (true) or not (false).
+Value *emitAtomicCompareExchangeBuiltin(
+ Value *Ptr, Value *ExpectedPtr, Value *DesiredPtr,
+ std::variant<Value *, bool> IsWeak, bool IsVolatile,
+ std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder,
+ std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> FailureMemorder,
+ std::variant<Value *, SyncScope::ID, StringRef> Scope, Value *PrevPtr,
+ Type *DataTy, std::optional<uint64_t> DataSize,
+ std::optional<uint64_t> AvailableSize, MaybeAlign Align,
+ IRBuilderBase &Builder, const DataLayout &DL, const TargetLibraryInfo *TLI,
+ const TargetLowering *TL,
+ ArrayRef<std::pair<uint32_t, StringRef>> SyncScopes,
+ StringRef FallbackScope, bool AllowInstruction = true,
+ bool AllowSwitch = true, bool AllowSizedLibcall = true);
+
+Value *emitAtomicCompareExchangeBuiltin(
+ Value *Ptr, Value *ExpectedPtr, Value *DesiredPtr,
+ std::variant<Value *, bool> Weak, bool IsVolatile,
+ std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder,
+ std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> FailureMemorder,
+ Value *PrevPtr, Type *DataTy, std::optional<uint64_t> DataSize,
+ std::optional<uint64_t> AvailableSize, MaybeAlign Align,
+ IRBuilderBase &Builder, const DataLayout &DL, const TargetLibraryInfo *TLI,
+ const TargetLowering *TL, bool AllowInstruction = true,
+ bool AllowSwitch = true, bool AllowSizedLibcall = true);
+
+} // namespace llvm
+
+#endif /* LLVM_TRANSFORMS_UTILS_BUILDBUILTINS_H */
diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
index 429d6a2e05236f..82f70983c9acb6 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -192,6 +192,25 @@ namespace llvm {
Value *emitVSPrintf(Value *Dest, Value *Fmt, Value *VAList, IRBuilderBase &B,
const TargetLibraryInfo *TLI);
+ /// Emit a call to the __atomic_compare_exchange function.
+ /// Defined here: https://llvm.org/docs/Atomics.html#libcalls-atomic,
+ /// https://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary#list_of_library_routines
+ /// (Different signature than the builtins defined here:
+ /// https://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary#GCC_intrinsics)
+ Value *emitAtomicCompareExchange(Value *Size, Value *Ptr, Value *Expected,
+ Value *Desired, Value *SuccessMemorder,
+ Value *FailureMemorder, IRBuilderBase &B,
+ const DataLayout &DL,
+ const TargetLibraryInfo *TLI);
+
+ /// Variant of __atomic_compare_exchange where \p Size is either 1, 2, 4, 8,
+ /// or 16.
+ Value *emitAtomicCompareExchangeN(int Size, Value *Ptr, Value *Expected,
+ Value *Desired, Value *SuccessMemorder,
+ Value *FailureMemorder, IRBuilderBase &B,
+ const DataLayout &DL,
+ const TargetLibraryInfo *TLI);
+
/// Emit a call to the unary function named 'Name' (e.g. 'floor'). This
/// function is known to take a single of type matching 'Op' and returns one
/// value with the same type. If 'Op' is a long double, 'l' is added as the
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 5b9a7b0f332205..e907be02adfc22 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -60,6 +60,7 @@ std::string VecDesc::getVectorFunctionABIVariantString() const {
enum FuncArgTypeID : char {
Void = 0, // Must be zero.
Bool, // 8 bits on all targets
+ Int8,
Int16,
Int32,
Int,
@@ -70,6 +71,7 @@ enum FuncArgTypeID : char {
LLong, // 64 bits on all targets.
SizeT, // size_t.
SSizeT, // POSIX ssize_t.
+ Int128,
Flt, // IEEE float.
Dbl, // IEEE double.
LDbl, // Any floating type (TODO: tighten this up).
@@ -202,6 +204,11 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.disableAllFunctions();
TLI.setAvailable(llvm::LibFunc___kmpc_alloc_shared);
TLI.setAvailable(llvm::LibFunc___kmpc_free_shared);
+
+ // FIXME: Some regression tests require this function, even though it is not
+ // supported.
+ TLI.setAvailable(llvm::LibFunc_atomic_compare_exchange);
+
return;
}
@@ -1004,6 +1011,7 @@ static bool matchType(FuncArgTypeID ArgTy, const Type *Ty, unsigned IntBits,
case Void:
return Ty->isVoidTy();
case Bool:
+ case Int8:
return Ty->isIntegerTy(8);
case Int16:
return Ty->isIntegerTy(16);
@@ -1025,6 +1033,8 @@ static bool matchType(FuncArgTypeID ArgTy, const Type *Ty, unsigned IntBits,
case SizeT:
case SSizeT:
return Ty->isIntegerTy(SizeTBits);
+ case Int128:
+ return Ty->isIntegerTy(128);
case Flt:
return Ty->isFloatTy();
case Dbl:
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index ebcf76175a36ba..248b7f0ef2cc0f 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/InstSimplifyFolder.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/AtomicExpand.h"
#include "llvm/CodeGen/AtomicExpandUtils.h"
#include "llvm/CodeGen/RuntimeLibcallUtil.h"
@@ -51,6 +52,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/BuildBuiltins.h"
#include "llvm/Transforms/Utils/LowerAtomic.h"
#include <cassert>
#include <cstdint>
@@ -65,6 +67,7 @@ namespace {
class AtomicExpandImpl {
const TargetLowering *TLI = nullptr;
const DataLayout *DL = nullptr;
+ TargetLibraryInfo *TLII = nullptr;
private:
bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
@@ -120,7 +123,7 @@ class AtomicExpandImpl {
CreateCmpXchgInstFun CreateCmpXchg);
public:
- bool run(Function &F, const TargetMachine *TM);
+ bool run(Function &F, const TargetMachine *TM, TargetLibraryInfo *TLII);
};
class AtomicExpandLegacy : public FunctionPass {
@@ -131,6 +134,8 @@ class AtomicExpandLegacy : public FunctionPass {
initializeAtomicExpandLegacyPass(*PassRegistry::getPassRegistry());
}
+ void getAnalysisUsage(AnalysisUsage &) const override;
+
bool runOnFunction(Function &F) override;
};
@@ -203,11 +208,13 @@ static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
}
-bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
+bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM,
+ TargetLibraryInfo *TLII) {
const auto *Subtarget = TM->getSubtargetImpl(F);
if (!Subtarget->enableAtomicExpand())
return false;
TLI = Subtarget->getTargetLowering();
+ this->TLII = TLII;
DL = &F.getDataLayout();
SmallVector<Instruction *, 1> AtomicInsts;
@@ -349,14 +356,18 @@ bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
return MadeChange;
}
-bool AtomicExpandLegacy::runOnFunction(Function &F) {
+void AtomicExpandLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
+bool AtomicExpandLegacy::runOnFunction(Function &F) {
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
if (!TPC)
return false;
+ auto &&TLIAnalysis = getAnalysis<TargetLibraryInfoWrapperPass>();
auto *TM = &TPC->getTM<TargetMachine>();
AtomicExpandImpl AE;
- return AE.run(F, TM);
+ return AE.run(F, TM, &TLIAnalysis.getTLI(F));
}
FunctionPass *llvm::createAtomicExpandLegacyPass() {
@@ -367,7 +378,8 @@ PreservedAnalyses AtomicExpandPass::run(Function &F,
FunctionAnalysisManager &AM) {
AtomicExpandImpl AE;
- bool Changed = AE.run(F, TM);
+ auto &&TLII = AM.getResult<TargetLibraryAnalysis>(F);
+ bool Changed = AE.run(F, TM, &TLII);
if (!Changed)
return PreservedAnalyses::all();
@@ -1712,18 +1724,48 @@ void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
}
void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
- static const RTLIB::Libcall Libcalls[6] = {
- RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
- RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
- RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
+ Module *M = I->getModule();
+ const DataLayout &DL = M->getDataLayout();
unsigned Size = getAtomicOpSize(I);
+ LLVMContext &Ctx = I->getContext();
+ IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
+ Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
+ const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
- bool expanded = expandAtomicOpToLibcall(
- I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
- I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
- Libcalls);
- if (!expanded)
- report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");
+ IRBuilder<> Builder(I);
+
+ Value *Ptr = I->getPointerOperand();
+ Value *Cmp = I->getCompareOperand();
+ Value *Val = I->getNewValOperand();
+
+ AllocaInst *ExpectedPtr = AllocaBuilder.CreateAlloca(Cmp->getType(), nullptr,
+ "cmpxchg.expected.ptr");
+ Builder.CreateStore(Cmp, ExpectedPtr);
+
+ AllocaInst *DesiredPtr = AllocaBuilder.CreateAlloca(Val->getType(), nullptr,
+ "cmpxchg.desired.ptr");
+ Builder.CreateStore(Val, DesiredPtr);
+
+ AllocaInst *PrevPtr =
+ AllocaBuilder.CreateAlloca(Val->getType(), nullptr, "cmpxchg.prev.ptr");
+ Value *SuccessResult = emitAtomicCompareExchangeBuiltin(
+ Ptr, ExpectedPtr, DesiredPtr, I->isWeak(), I->isVolatile(),
+ I->getSuccessOrdering(), I->getFailureOrdering(), I->getSyncScopeID(),
+ PrevPtr, Cmp->getType(), {}, {}, I->getAlign(), Builder, DL, TLII, TLI,
+ {}, {},
+ /*AllowInstruction=*/false, /*AllowSwitch=*/true,
+ /*AllowSizedLibcall=*/true);
+
+ // The final result from the CAS is a pair
+ // {load of 'expected' alloca, bool result from call}
+ Type *FinalResultTy = I->getType();
+ Value *V = PoisonValue::get(FinalResultTy);
+ Value *ExpectedOut = Builder.CreateAlignedLoad(
+ Cmp->getType(), PrevPtr, AllocaAlignment, "cmpxchg.prev.load");
+ V = Builder.CreateInsertValue(V, ExpectedOut, 0);
+ V = Builder.CreateInsertValue(V, SuccessResult, 1);
+ I->replaceAllUsesWith(V);
+ I->eraseFromParent();
}
static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index d0dfafeaef561f..e5b47e77fdafef 100644
--- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -44,6 +44,10 @@ static cl::opt<bool> EnableNoTrapAfterNoreturn(
void LLVMTargetMachine::initAsmInfo() {
MRI.reset(TheTarget.createMCRegInfo(getTargetTriple().str()));
+ if (!MRI) {
+ IsValid = false;
+ return;
+ }
assert(MRI && "Unable to create reg info");
MII.reset(TheTarget.createMCInstrInfo());
assert(MII && "Unable to create instruction info");
diff --git a/llvm/lib/MC/TargetRegistry.cpp b/llvm/lib/MC/TargetRegistry.cpp
index 3be6f1d4634990..459a2167ca1cb5 100644
--- a/llvm/lib/MC/TargetRegistry.cpp
+++ b/llvm/lib/MC/TargetRegistry.cpp
@@ -15,10 +15,26 @@
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
#include <cassert>
#include <vector>
using namespace llvm;
+TargetMachine *Target::createTargetMachine(StringRef TT, StringRef CPU,
+ StringRef Features,
+ const TargetOptions &Options,
+ std::optional<Reloc::Model> RM,
+ std::optional<CodeModel::Model> CM,
+ CodeGenOptLevel OL, bool JIT) const {
+ if (!TargetMachineCtorFn)
+ return nullptr;
+ TargetMachine *Result = TargetMachineCtorFn(*this, Triple(TT), CPU, Features,
+ Options, RM, CM, OL, JIT);
+ if (!Result->isValid())
+ return nullptr;
+ return Result;
+}
+
// Clients are responsible for avoid race conditions in registration.
static Target *FirstTarget = nullptr;
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 1ef891d1b677a2..6dd3afb56cef88 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -296,8 +296,6 @@ getEffectivePPCCodeModel(const Triple &TT, std::optional<CodeModel::Model> CM,
if (TT.isOSAIX())
return CodeModel::Small;
- assert(TT.isOSBinFormatELF() && "All remaining PPC OSes are ELF based.");
-
if (TT.isArch32Bit())
return CodeModel::Small;
diff --git a/llvm/lib/Transforms/Utils/BuildBuiltins.cpp b/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
new file mode 100644
index 00000000000000..7c68d18e93bdb1
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
@@ -0,0 +1,587 @@
+//===- BuildBuiltins.cpp - Utility builder for builtins -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some functions for lowering compiler builtins.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BuildBuiltins.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+
+using namespace llvm;
+
+static IntegerType *getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI) {
+ return B.getIntNTy(TLI->getIntSize());
+}
+
+static IntegerType *getSizeTTy(IRBuilderBase &B, const TargetLibraryInfo *TLI) {
+ const Module *M = B.GetInsertBlock()->getModule();
+ return B.getIntNTy(TLI->getSizeTSize(*M));
+}
+
+/// In order to use one of the sized library calls such as
+/// __atomic_fetch_add_4, the alignment must be sufficient, the size
+/// must be one of the potentially-specialized sizes, and the value
+/// type must actually exist in C on the target (otherwise, the
+/// function wouldn't actually be defined.)
+static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
+ const DataLayout &DL) {
+ // TODO: "LargestSize" is an approximation for "largest type that
+ // you can express in C". It seems to be the case that int128 is
+ // supported on all 64-bit platforms, otherwise only up to 64-bit
+ // integers are supported. If we get this wrong, then we'll try to
+ // call a sized libcall that doesn't actually exist. There should
+ // really be some more reliable way in LLVM of determining integer
+ // sizes which are valid in the target's C ABI...
+ unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
+ return Alignment >= Size &&
+ (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
+ Size <= LargestSize;
+}
+
+Value *llvm::emitAtomicCompareExchangeBuiltin(
+ Value *Ptr, Value *ExpectedPtr, Value *DesiredPtr,
+ std::variant<Value *, bool> IsWeak, bool IsVolatile,
+ std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder,
+ std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> FailureMemorder,
+ std::variant<Value *, SyncScope::ID, StringRef> Scope, Value *PrevPtr,
+ Type *DataTy, std::optional<uint64_t> DataSize,
+ std::optional<uint64_t> AvailableSize, MaybeAlign Align,
+ IRBuilderBase &Builder, const DataLayout &DL, const TargetLibraryInfo *TLI,
+ const TargetLowering *TL,
+ ArrayRef<std::pair<uint32_t, StringRef>> SyncScopes,
+ StringRef FallbackScope, bool AllowInstruction, bool AllowSwitch,
+ bool AllowSizedLibcall) {
+ assert(Ptr->getType()->isPointerTy());
+ assert(ExpectedPtr->getType()->isPointerTy());
+ assert(DesiredPtr->getType()->isPointerTy());
+ assert(TLI);
+
+ LLVMContext &Ctx = Builder.getContext();
+ Function *CurFn = Builder.GetInsertBlock()->getParent();
+
+ unsigned MaxAtomicSizeSupported = 16;
+ if (TL)
+ MaxAtomicSizeSupported = TL->getMaxAtomicSizeInBitsSupported() / 8;
+
+ uint64_t DataSizeConst;
+ if (DataSize) {
+ DataSizeConst = *DataSize;
+ } else {
+ TypeSize DS = DL.getTypeStoreSize(DataTy);
+ DataSizeConst = DS.getFixedValue();
+ }
+ uint64_t AvailableSizeConst = AvailableSize.value_or(DataSizeConst);
+ assert(DataSizeConst <= AvailableSizeConst);
+
+#ifndef NDEBUG
+ if (DataTy) {
+ // 'long double' (80-bit extended precision) behaves strange here.
+ // DL.getTypeStoreSize says it is 10 bytes
+ // Clang says it is 12 bytes
+ // AtomicExpandPass would disagree with CGAtomic (not for cmpxchg that does
+ // not support floats, so AtomicExpandPass doesn't even know it originally
+ // was an FP80)
+ TypeSize DS = DL.getTypeStoreSize(DataTy);
+ assert(DS.getKnownMinValue() <= DataSizeConst &&
+ "Must access at least all the relevant bits of the data, possibly "
+ "some more for padding");
+ }
+#endif
+
+ Type *BoolTy = Builder.getInt1Ty();
+ Type *IntTy = getIntTy(Builder, TLI);
+
+ uint64_t PreferredSize = PowerOf2Ceil(DataSizeConst);
+ if (!PreferredSize || PreferredSize > MaxAtomicSizeSupported)
+ PreferredSize = DataSizeConst;
+
+ llvm::Align EffectiveAlign;
+ if (Align) {
+ EffectiveAlign = *Align;
+ } else {
+ // https://llvm.org/docs/LangRef.html#cmpxchg-instruction
+ //
+ // The alignment is only optional when parsing textual IR; for in-memory
+ // IR, it is always present. If unspecified, the alignment is assumed to
+ // be equal to the size of the ‘<value>’ type.
+ //
+ // We prefer safety here and assume no alignment, unless
+ // getPointerAlignment() can determine the actual alignment.
+ EffectiveAlign = Ptr->getPointerAlignment(DL);
+ }
+
+ // Only use the original data type if it is compatible with cmpxchg (and sized
+ // libcall function) and matches the preferred size. No type punning needed
+ // for __atomic_compare_exchange which only takes pointers.
+ Type *CoercedTy = nullptr;
+ if (DataTy && DataSizeConst == PreferredSize &&
+ (DataTy->isIntegerTy() || DataTy->isPointerTy()))
+ CoercedTy = DataTy;
+ else if (PreferredSize <= 16)
+ CoercedTy = IntegerType::get(Ctx, PreferredSize * 8);
+
+ // For resolving the SuccessMemorder/FailureMemorder arguments. If it is
+ // constant, determine the AtomicOrdering for use with the cmpxchg
+ // instruction. Also determines the llvm::Value to be passed to
+ // __atomic_compare_exchange in case cmpxchg is not legal.
+ auto processMemorder = [&](auto MemorderVariant)
+ -> std::pair<std::optional<AtomicOrdering>, Value *> {
+ if (std::holds_alternative<AtomicOrdering>(MemorderVariant)) {
+ auto Memorder = std::get<AtomicOrdering>(MemorderVariant);
+ return std::make_pair(
+ Memorder,
+ ConstantInt::get(IntTy, static_cast<uint64_t>(toCABI(Memorder))));
+ }
+ if (std::holds_alternative<AtomicOrderingCABI>(MemorderVariant)) {
+ auto MemorderCABI = std::get<AtomicOrderingCABI>(MemorderVariant);
+ return std::make_pair(
+ fromCABI(MemorderCABI),
+ ConstantInt::get(IntTy, static_cast<uint64_t>(MemorderCABI)));
+ }
+
+ auto *MemorderCABI = std::get<Value *>(MemorderVariant);
+ if (auto *MO = dyn_cast<ConstantInt>(MemorderCABI)) {
+ uint64_t MOInt = MO->getZExtValue();
+ return std::make_pair(fromCABI(MOInt), MO);
+ }
+
+ return std::make_pair(std::nullopt, MemorderCABI);
+ };
+
+ auto processIsWeak =
+ [&](auto WeakVariant) -> std::pair<std::optional<bool>, Value *> {
+ if (std::holds_alternative<bool>(WeakVariant)) {
+ bool IsWeakBool = std::get<bool>(WeakVariant);
+ return std::make_pair(IsWeakBool, Builder.getInt1(IsWeakBool));
+ }
+
+ auto *BoolVal = std::get<Value *>(WeakVariant);
+ if (auto *BoolConst = dyn_cast<ConstantInt>(BoolVal)) {
+ uint64_t IsWeakBool = BoolConst->getZExtValue();
+ return std::make_pair(IsWeakBool != 0, BoolVal);
+ }
+
+ return std::make_pair(std::nullopt, BoolVal);
+ };
+
+ auto processScope = [&](auto ScopeVariant)
+ -> std::pair<std::optional<SyncScope::ID>, Value *> {
+ if (std::holds_alternative<SyncScope::ID>(ScopeVariant)) {
+ auto ScopeID = std::get<SyncScope::ID>(ScopeVariant);
+ return std::make_pair(ScopeID, nullptr);
+ }
+
+ if (std::holds_alternative<StringRef>(ScopeVariant)) {
+ auto ScopeName = std::get<StringRef>(ScopeVariant);
+ SyncScope::ID ScopeID = Ctx.getOrInsertSyncScopeID(ScopeName);
+ return std::make_pair(ScopeID, nullptr);
+ }
+
+ auto *IntVal = std::get<Value *>(ScopeVariant);
+ if (auto *InstConst = dyn_cast<ConstantInt>(IntVal)) {
+ uint64_t ScopeVal = InstConst->getZExtValue();
+ return std::make_pair(ScopeVal, IntVal);
+ }
+
+ return std::make_pair(std::nullopt, IntVal);
+ };
+
+ auto [IsWeakConst, IsWeakVal] = processIsWeak(IsWeak);
+ auto [SuccessMemorderConst, SuccessMemorderCABI] =
+ processMemorder(SuccessMemorder);
+ auto [FailureMemorderConst, FailureMemorderCABI] =
+ processMemorder(FailureMemorder);
+ auto [ScopeConst, ScopeVal] = processScope(Scope);
+
+ if (SuccessMemorderConst && FailureMemorderConst) {
+ // https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
+ //
+ // [failure_memorder] This memory order cannot be __ATOMIC_RELEASE nor
+ // __ATOMIC_ACQ_REL. It also cannot be a stronger order than that
+ // specified by success_memorder.
+ //
+ // https://llvm.org/docs/LangRef.html#cmpxchg-instruction
+ //
+ // Both ordering parameters must be at least monotonic, the failure
+ // ordering cannot be either release or acq_rel.
+ //
+ // Release/Acquire exception because of test/CodeGen/atomic-ops.c (function
+ // "generalWeakness") regression test.
+ assert(*FailureMemorderConst != AtomicOrdering::Release);
+ assert(*FailureMemorderConst != AtomicOrdering::AcquireRelease);
+ assert(
+ isAtLeastOrStrongerThan(*SuccessMemorderConst, *FailureMemorderConst) ||
+ (*SuccessMemorderConst == AtomicOrdering::Release &&
+ *FailureMemorderConst == AtomicOrdering::Acquire));
+ }
+
+ // https://llvm.org/docs/LangRef.html#cmpxchg-instruction
+ //
+ // The type of ‘<cmp>’ must be an integer or pointer type whose bit width is
+ // a power of two greater than or equal to eight and less than or equal to a
+ // target-specific size limit.
+ bool CanUseCmpxchngInst = PreferredSize <= MaxAtomicSizeSupported &&
+ llvm::isPowerOf2_64(PreferredSize) && CoercedTy;
+ bool CanUseSingleCmpxchngInst = CanUseCmpxchngInst && SuccessMemorderConst &&
+ FailureMemorderConst && IsWeakConst &&
+ ScopeConst;
+ bool CanUseSizedLibcall =
+ canUseSizedAtomicCall(PreferredSize, EffectiveAlign, DL);
+
+ Value *ExpectedVal;
+ Value *DesiredVal;
+
+ // Emit cmpxchg instruction, either as a single instruction, or as a case of a
+ // per-constant switch.
+ auto EmitCmpxchngInst = [&](bool IsWeak, SyncScope::ID Scope,
+ AtomicOrdering SuccessMemorder,
+ AtomicOrdering FailureMemorder) {
+ AtomicCmpXchgInst *AtomicInst =
+ Builder.CreateAtomicCmpXchg(Ptr, ExpectedVal, DesiredVal, Align,
+ SuccessMemorder, FailureMemorder, Scope);
+ AtomicInst->setName("cmpxchg.pair");
+ AtomicInst->setAlignment(EffectiveAlign);
+ AtomicInst->setWeak(IsWeak);
+ AtomicInst->setVolatile(IsVolatile);
+
+ if (PrevPtr) {
+ Value *PreviousVal =
+ Builder.CreateExtractValue(AtomicInst, /*Idxs=*/0, "cmpxchg.prev");
+ Builder.CreateStore(PreviousVal, PrevPtr);
+ }
+
+ Value *SuccessFailureVal =
+ Builder.CreateExtractValue(AtomicInst, /*Idxs=*/1, "cmpxchg.success");
+
+ assert(SuccessFailureVal->getType()->isIntegerTy(1));
+ return SuccessFailureVal;
+ };
+
+ if (CanUseSingleCmpxchngInst && AllowInstruction) {
+ // FIXME: Need to get alignment correct
+ ExpectedVal =
+ Builder.CreateLoad(CoercedTy, ExpectedPtr, "cmpxchg.expected");
+ DesiredVal = Builder.CreateLoad(CoercedTy, DesiredPtr, "cmpxchg.desired");
+ return EmitCmpxchngInst(*IsWeakConst, *ScopeConst, *SuccessMemorderConst,
+ *FailureMemorderConst);
+ }
+
+ // Switching only needed for cmpxchg instruction which requires constant
+ // arguments.
+ // FIXME: If AtomicExpandPass later considers the cmpxchg not lowerable for
+ // the given target, it will also generate a call to the
+ // __atomic_compare_exchange function. In that case the switching was very
+ // unnecessary but cannot be undone.
+ if (CanUseCmpxchngInst && AllowSwitch && AllowInstruction) {
+ auto createBasicBlock = [&](const Twine &Name) {
+ return BasicBlock::Create(Ctx, Name, CurFn);
+ };
+
+ ExpectedVal =
+ Builder.CreateLoad(CoercedTy, ExpectedPtr, "cmpxchg.expected");
+ DesiredVal = Builder.CreateLoad(CoercedTy, DesiredPtr, "cmpxchg.desired");
+
+ auto GenFailureMemorderSwitch =
+ [&](bool IsWeak, SyncScope::ID Scope,
+ AtomicOrdering SuccessMemorder) -> Value * {
+ if (FailureMemorderConst) {
+ // FIXME: (from CGAtomic)
+ // 31.7.2.18: "The failure argument shall not be memory_order_release
+ // nor memory_order_acq_rel". Fallback to monotonic.
+ //
+ // Prior to c++17, "the failure argument shall be no stronger than the
+ // success argument". This condition has been lifted and the only
+ // precondition is 31.7.2.18. Effectively treat this as a DR and skip
+ // language version checks.
+ return EmitCmpxchngInst(IsWeak, Scope, SuccessMemorder,
+ *FailureMemorderConst);
+ }
+
+ // Create all the relevant BB's
+ BasicBlock *MonotonicBB = createBasicBlock("monotonic_fail");
+ BasicBlock *AcquireBB = createBasicBlock("acquire_fail");
+ BasicBlock *SeqCstBB = createBasicBlock("seqcst_fail");
+ BasicBlock *ContBB = createBasicBlock("atomic.continue");
+
+ // MonotonicBB is arbitrarily chosen as the default case; in practice,
+ // this doesn't matter unless someone is crazy enough to use something
+ // that doesn't fold to a constant for the ordering.
+ llvm::SwitchInst *SI =
+ Builder.CreateSwitch(FailureMemorderCABI, MonotonicBB);
+ // Implemented as acquire, since it's the closest in LLVM.
+ SI->addCase(
+ Builder.getInt32(static_cast<int32_t>(AtomicOrderingCABI::consume)),
+ AcquireBB);
+ SI->addCase(
+ Builder.getInt32(static_cast<int32_t>(AtomicOrderingCABI::acquire)),
+ AcquireBB);
+ SI->addCase(
+ Builder.getInt32(static_cast<int32_t>(AtomicOrderingCABI::seq_cst)),
+ SeqCstBB);
+
+ // Emit all the different atomics
+ Builder.SetInsertPoint(MonotonicBB);
+ Value *MonotonicResult = EmitCmpxchngInst(IsWeak, Scope, SuccessMemorder,
+ AtomicOrdering::Monotonic);
+ BasicBlock *MonotonicSourceBB = Builder.GetInsertBlock();
+ Builder.CreateBr(ContBB);
+
+ Builder.SetInsertPoint(AcquireBB);
+ Value *AcquireResult = EmitCmpxchngInst(IsWeak, Scope, SuccessMemorder,
+ AtomicOrdering::Acquire);
+ BasicBlock *AcquireSourceBB = Builder.GetInsertBlock();
+ Builder.CreateBr(ContBB);
+
+ Builder.SetInsertPoint(SeqCstBB);
+ Value *SeqCstResult =
+ EmitCmpxchngInst(IsWeak, Scope, SuccessMemorder,
+ AtomicOrdering::SequentiallyConsistent);
+ BasicBlock *SeqCstSourceBB = Builder.GetInsertBlock();
+ Builder.CreateBr(ContBB);
+
+ Builder.SetInsertPoint(ContBB);
+ PHINode *Result = Builder.CreatePHI(BoolTy, 3, "cmpxcgh.success");
+ Result->addIncoming(MonotonicResult, MonotonicSourceBB);
+ Result->addIncoming(AcquireResult, AcquireSourceBB);
+ Result->addIncoming(SeqCstResult, SeqCstSourceBB);
+ return Result;
+ };
+
+ auto GenSuccessMemorderSwitch = [&](bool IsWeak,
+ SyncScope::ID Scope) -> Value * {
+ if (SuccessMemorderConst)
+ return GenFailureMemorderSwitch(IsWeak, Scope, *SuccessMemorderConst);
+
+ // Create all the relevant BB's
+ BasicBlock *MonotonicBB = createBasicBlock("monotonic");
+ BasicBlock *AcquireBB = createBasicBlock("acquire");
+ BasicBlock *ReleaseBB = createBasicBlock("release");
+ BasicBlock *AcqRelBB = createBasicBlock("acqrel");
+ BasicBlock *SeqCstBB = createBasicBlock("seqcst");
+ BasicBlock *ContBB = createBasicBlock("atomic.continue");
+
+ // Create the switch for the split
+ // MonotonicBB is arbitrarily chosen as the default case; in practice,
+ // this doesn't matter unless someone is crazy enough to use something
+ // that doesn't fold to a constant for the ordering.
+ Value *Order = Builder.CreateIntCast(SuccessMemorderCABI,
+ Builder.getInt32Ty(), false);
+ llvm::SwitchInst *SI = Builder.CreateSwitch(Order, MonotonicBB);
+
+ Builder.SetInsertPoint(ContBB);
+ PHINode *Result = Builder.CreatePHI(BoolTy, 5, "cmpxcgh.success");
+
+ // Emit all the different atomics
+ Builder.SetInsertPoint(MonotonicBB);
+ Value *MonotonicResult =
+ GenFailureMemorderSwitch(IsWeak, Scope, AtomicOrdering::Monotonic);
+ Result->addIncoming(MonotonicResult, MonotonicBB);
+ Builder.CreateBr(ContBB);
+
+ Builder.SetInsertPoint(AcquireBB);
+ Value *AcquireResult =
+ GenFailureMemorderSwitch(IsWeak, Scope, AtomicOrdering::Acquire);
+ Builder.CreateBr(ContBB);
+ SI->addCase(
+ Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::consume)),
+ Builder.GetInsertBlock());
+ SI->addCase(
+ Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::acquire)),
+ Builder.GetInsertBlock());
+ Result->addIncoming(AcquireResult, AcquireBB);
+
+ Builder.SetInsertPoint(ReleaseBB);
+ Value *ReleaseResult =
+ GenFailureMemorderSwitch(IsWeak, Scope, AtomicOrdering::Release);
+ Builder.CreateBr(ContBB);
+ SI->addCase(
+ Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::release)),
+ Builder.GetInsertBlock());
+ Result->addIncoming(ReleaseResult, Builder.GetInsertBlock());
+
+ Builder.SetInsertPoint(AcqRelBB);
+ Value *AcqRelResult = GenFailureMemorderSwitch(
+ IsWeak, Scope, AtomicOrdering::AcquireRelease);
+ Builder.CreateBr(ContBB);
+ SI->addCase(
+ Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::acq_rel)),
+ AcqRelBB);
+ Result->addIncoming(AcqRelResult, Builder.GetInsertBlock());
+
+ Builder.SetInsertPoint(SeqCstBB);
+ Value *SeqCstResult = GenFailureMemorderSwitch(
+ IsWeak, Scope, AtomicOrdering::SequentiallyConsistent);
+ Builder.CreateBr(ContBB);
+ SI->addCase(
+ Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::seq_cst)),
+ SeqCstBB);
+ Result->addIncoming(SeqCstResult, Builder.GetInsertBlock());
+
+ Builder.SetInsertPoint(Result->getNextNode());
+ return Result;
+ };
+
+ auto GenScopeSwitch = [&](bool IsWeak) -> Value * {
+ if (ScopeConst)
+ return GenSuccessMemorderSwitch(IsWeak, *ScopeConst);
+
+ // Handle non-constant scope.
+ DenseMap<unsigned, BasicBlock *> BB;
+ for (const auto &S : SyncScopes) {
+ if (FallbackScope == S.second)
+ continue; // always the default case
+ BB[S.first] = createBasicBlock(Twine("cmpxchg.scope.") + S.second);
+ }
+
+ BasicBlock *DefaultBB = createBasicBlock("atomic.scope.fallback");
+ BasicBlock *ContBB = createBasicBlock("atomic.scope.continue");
+
+ Builder.SetInsertPoint(ContBB);
+ PHINode *Result =
+ Builder.CreatePHI(BoolTy, SyncScopes.size() + 1, "cmpxchg.success");
+
+ Value *SC = Builder.CreateIntCast(ScopeVal, Builder.getInt32Ty(),
+ /*IsSigned*/ false,
+ "atomic.cmpxchg.scope.cast");
+ // If unsupported synch scope is encountered at run time, assume a
+ // fallback synch scope value.
+ SwitchInst *SI = Builder.CreateSwitch(SC, DefaultBB);
+ for (const auto &S : SyncScopes) {
+ BasicBlock *B = BB[S.first];
+ SI->addCase(Builder.getInt32(S.first), B);
+
+ Builder.SetInsertPoint(B);
+ SyncScope::ID SyncScopeID = Ctx.getOrInsertSyncScopeID(S.second);
+ Value *SyncResult = GenSuccessMemorderSwitch(IsWeak, SyncScopeID);
+ Result->addIncoming(SyncResult, Builder.GetInsertBlock());
+ Builder.CreateBr(ContBB);
+ }
+
+ Builder.SetInsertPoint(DefaultBB);
+ SyncScope::ID SyncScopeID = Ctx.getOrInsertSyncScopeID(FallbackScope);
+ Value *DefaultResult = GenSuccessMemorderSwitch(IsWeak, SyncScopeID);
+ Result->addIncoming(DefaultResult, Builder.GetInsertBlock());
+ Builder.CreateBr(ContBB);
+
+ Builder.SetInsertPoint(Result->getNextNode());
+ return Result;
+ };
+
+ auto GenWeakSwitch = [&]() -> Value * {
+ if (IsWeakConst)
+ return GenScopeSwitch(*IsWeakConst);
+
+ // Create all the relevant BB's
+ BasicBlock *StrongBB = createBasicBlock("cmpxchg.strong");
+ BasicBlock *WeakBB = createBasicBlock("cmpxchg.weak");
+ BasicBlock *ContBB = createBasicBlock("cmpxchg.continue");
+
+ // FIXME: Why is this a switch?
+ llvm::SwitchInst *SI = Builder.CreateSwitch(IsWeakVal, WeakBB);
+ SI->addCase(Builder.getInt1(false), StrongBB);
+
+ Builder.SetInsertPoint(StrongBB);
+ Value *StrongResult = GenScopeSwitch(false);
+ BasicBlock *StrongSourceBB = Builder.GetInsertBlock();
+ Builder.CreateBr(ContBB);
+
+ Builder.SetInsertPoint(WeakBB);
+ Value *WeakResult = GenScopeSwitch(true);
+ BasicBlock *WeakSourceBB = Builder.GetInsertBlock();
+ Builder.CreateBr(ContBB);
+
+ Builder.SetInsertPoint(ContBB);
+ PHINode *Result = Builder.CreatePHI(BoolTy, 2, "cmpxchg.isweak.success");
+ Result->addIncoming(WeakResult, WeakSourceBB);
+ Result->addIncoming(StrongResult, StrongSourceBB);
+ return Result;
+ };
+
+ return GenWeakSwitch();
+ }
+
+ // Fallback to a libcall function. From here on IsWeak/IsVolatile is ignored.
+ // IsWeak is assumed to be false and volatile does not apply to function
+ // calls.
+
+ // FIXME: Some AMDGCN regression tests the addrspace, but
+ // __atomic_compare_exchange by definition is addrsspace(0) and
+ // emitAtomicCompareExchange will complain about it.
+ if (Ptr->getType()->getPointerAddressSpace())
+ return Builder.getInt1(false);
+
+ assert(ScopeConst && *ScopeConst == SyncScope::System && !ScopeVal &&
+ "Synchronization scopes not supported by libcall functions");
+
+ if (CanUseSizedLibcall && AllowSizedLibcall) {
+ LoadInst *DesiredVal =
+ Builder.CreateLoad(CoercedTy, DesiredPtr, "cmpxchg.desired");
+ Value *SuccessResult = emitAtomicCompareExchangeN(
+ PreferredSize, Ptr, ExpectedPtr, DesiredVal, SuccessMemorderCABI,
+ FailureMemorderCABI, Builder, DL, TLI);
+ if (SuccessResult) {
+ assert(SuccessResult && "Must be able to emit libcall functions");
+ Value *SuccessBool =
+ Builder.CreateCmp(CmpInst::Predicate::ICMP_EQ, SuccessResult,
+ Builder.getInt8(0), "cmpxchg.success");
+
+ if (PrevPtr && PrevPtr != ExpectedPtr)
+ Builder.CreateMemCpy(PrevPtr, {}, ExpectedPtr, {}, DataSizeConst);
+ return SuccessBool;
+ }
+
+ // emitAtomicCompareExchangeN can return nullptr if the backend does not
+ // support sized libcalls. Fall back to the non-sized libcall and remove the
+ // unused load again.
+ DesiredVal->eraseFromParent();
+ }
+
+ // FIXME: emitAtomicCompareExchange may fail if a function declaration with
+ // the same name but different signature has already been emitted. Since the
+ // function name starts with "__", i.e. is reserved for use by the compiler,
+ // this should not happen.
+ // It may also fail if the target's TargetLibraryInfo claims that
+ // __atomic_compare_exchange is not supported. In either case there is no
+ // fallback for atomics not supported by the target and we have to crash.
+ Value *SuccessResult = emitAtomicCompareExchange(
+ ConstantInt::get(getSizeTTy(Builder, TLI), DataSizeConst), Ptr,
+ ExpectedPtr, DesiredPtr, SuccessMemorderCABI, FailureMemorderCABI,
+ Builder, DL, TLI);
+ if (!SuccessResult)
+ report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");
+
+ Value *SuccessBool =
+ Builder.CreateCmp(CmpInst::Predicate::ICMP_EQ, SuccessResult,
+ Builder.getInt8(0), "cmpxchg.success");
+
+ if (PrevPtr && PrevPtr != ExpectedPtr)
+ Builder.CreateMemCpy(PrevPtr, {}, ExpectedPtr, {}, DataSizeConst);
+ return SuccessBool;
+}
+
+Value *llvm::emitAtomicCompareExchangeBuiltin(
+ Value *Ptr, Value *ExpectedPtr, Value *DesiredPtr,
+ std::variant<Value *, bool> Weak, bool IsVolatile,
+ std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder,
+ std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> FailureMemorder,
+ Value *PrevPtr, Type *DataTy, std::optional<uint64_t> DataSize,
+ std::optional<uint64_t> AvailableSize, MaybeAlign Align,
+ IRBuilderBase &Builder, const DataLayout &DL, const TargetLibraryInfo *TLI,
+ const TargetLowering *TL, bool AllowInstruction, bool AllowSwitch,
+ bool AllowSizedLibcall) {
+ return emitAtomicCompareExchangeBuiltin(
+ Ptr, ExpectedPtr, DesiredPtr, Weak, IsVolatile, SuccessMemorder,
+ FailureMemorder, SyncScope::System, PrevPtr, DataTy, DataSize,
+ AvailableSize, Align, Builder, DL, TLI, TL, {}, StringRef(),
+ AllowInstruction, AllowSwitch, AllowSizedLibcall);
+}
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 30a343b2c564e8..d317777e3fdf29 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1251,6 +1251,11 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setOnlyWritesMemory(F);
Changed |= setWillReturn(F);
break;
+ case LibFunc_atomic_compare_exchange:
+ Changed |= setArgsNoUndef(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotThrow(F);
+ break;
default:
// FIXME: It'd be really nice to cover all the library functions we're
// aware of here.
@@ -1348,6 +1353,23 @@ FunctionCallee llvm::getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI,
setArgExtAttr(*F, 2, TLI);
break;
+ case LibFunc_atomic_compare_exchange:
+ setRetExtAttr(*F, TLI); // return
+ setArgExtAttr(*F, 4, TLI); // SuccessMemorder
+ setArgExtAttr(*F, 5, TLI); // FailureMemorder
+ break;
+
+ case LibFunc_atomic_compare_exchange_1:
+ case LibFunc_atomic_compare_exchange_2:
+ case LibFunc_atomic_compare_exchange_4:
+ case LibFunc_atomic_compare_exchange_8:
+ case LibFunc_atomic_compare_exchange_16:
+ setRetExtAttr(*F, TLI); // return
+ setArgExtAttr(*F, 2, TLI); // Desired
+ setArgExtAttr(*F, 3, TLI); // SuccessMemorder
+ setArgExtAttr(*F, 4, TLI); // FailureMemorder
+ break;
+
// These are functions that are known to not need any argument extension
// on any target: A size_t argument (which may be an i32 on some targets)
// should not trigger the assert below.
@@ -1703,6 +1725,58 @@ Value *llvm::emitVSPrintf(Value *Dest, Value *Fmt, Value *VAList,
{Dest, Fmt, VAList}, B, TLI);
}
+Value *llvm::emitAtomicCompareExchange(Value *Size, Value *Ptr, Value *Expected,
+ Value *Desired, Value *SuccessMemorder,
+ Value *FailureMemorder, IRBuilderBase &B,
+ const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ Type *BoolTy = B.getInt8Ty();
+ Type *SizeTTy = getSizeTTy(B, TLI);
+ Type *PtrTy = B.getPtrTy();
+ Type *IntTy = getIntTy(B, TLI);
+ return emitLibCall(
+ LibFunc_atomic_compare_exchange, BoolTy,
+ {SizeTTy, PtrTy, PtrTy, PtrTy, IntTy, IntTy},
+ {Size, Ptr, Expected, Desired, SuccessMemorder, FailureMemorder}, B, TLI);
+}
+
+Value *llvm::emitAtomicCompareExchangeN(int Size, Value *Ptr, Value *Expected,
+ Value *Desired, Value *SuccessMemorder,
+ Value *FailureMemorder,
+ IRBuilderBase &B, const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ LibFunc TheLibFunc;
+ switch (Size) {
+ case 1:
+ TheLibFunc = LibFunc_atomic_compare_exchange_1;
+ break;
+ case 2:
+ TheLibFunc = LibFunc_atomic_compare_exchange_2;
+ break;
+ case 4:
+ TheLibFunc = LibFunc_atomic_compare_exchange_4;
+ break;
+ case 8:
+ TheLibFunc = LibFunc_atomic_compare_exchange_8;
+ break;
+ case 16:
+ TheLibFunc = LibFunc_atomic_compare_exchange_16;
+ break;
+ default:
+ // emitLibCall below is also allowed to return nullptr, e.g. if
+ // TargetLibraryInfo says the backend does not support the libcall function.
+ return nullptr;
+ }
+
+ Type *BoolTy = B.getInt8Ty();
+ Type *PtrTy = B.getPtrTy();
+ Type *ValTy = B.getIntNTy(Size * 8);
+ Type *IntTy = getIntTy(B, TLI);
+ return emitLibCall(TheLibFunc, BoolTy, {PtrTy, PtrTy, ValTy, IntTy, IntTy},
+ {Ptr, Expected, Desired, SuccessMemorder, FailureMemorder},
+ B, TLI);
+}
+
/// Append a suffix to the function name according to the type of 'Op'.
static void appendTypeSuffix(Value *Op, StringRef &Name,
SmallString<20> &NameBuffer) {
diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt
index 51e8821773c3af..f8087466b53db9 100644
--- a/llvm/lib/Transforms/Utils/CMakeLists.txt
+++ b/llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -5,6 +5,7 @@ add_llvm_component_library(LLVMTransformUtils
AssumeBundleBuilder.cpp
BasicBlockUtils.cpp
BreakCriticalEdges.cpp
+ BuildBuiltins.cpp
BuildLibCalls.cpp
BypassSlowDivision.cpp
CallPromotionUtils.cpp
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
index 88061756d8feeb..4aca464d7972d6 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
@@ -1122,6 +1122,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1134,6 +1135,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1146,6 +1148,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -1158,6 +1161,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1170,6 +1174,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1182,6 +1187,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1194,6 +1200,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1206,6 +1213,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -1218,6 +1226,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1230,6 +1239,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1242,6 +1252,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -1254,6 +1265,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -1266,6 +1278,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -1278,6 +1291,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -1290,6 +1304,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -1303,6 +1318,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1317,6 +1333,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1331,6 +1348,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1345,6 +1363,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1359,6 +1378,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1887,6 +1907,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1899,6 +1920,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1911,6 +1933,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1923,6 +1946,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1935,6 +1959,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1947,6 +1972,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1959,6 +1985,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1971,6 +1998,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1983,6 +2011,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1995,6 +2024,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -2007,6 +2037,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -2019,6 +2050,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -2031,6 +2063,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -2043,6 +2076,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -2055,6 +2089,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -2067,6 +2102,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -2080,6 +2116,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -2093,6 +2130,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -2106,6 +2144,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -2119,6 +2158,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -2657,6 +2697,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2669,6 +2710,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2681,6 +2723,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2693,6 +2736,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2705,6 +2749,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2717,6 +2762,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2729,6 +2775,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2741,6 +2788,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2753,6 +2801,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2765,6 +2814,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2777,6 +2827,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2789,6 +2840,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2801,6 +2853,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2813,6 +2866,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2825,6 +2879,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2838,11 +2893,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -2853,11 +2909,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2868,11 +2925,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2883,11 +2941,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2898,11 +2957,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3508,6 +3568,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3522,6 +3583,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3536,6 +3598,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -3550,6 +3613,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3564,6 +3628,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3578,6 +3643,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3592,6 +3658,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3606,6 +3673,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -3620,6 +3688,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3634,6 +3703,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3648,6 +3718,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -3662,6 +3733,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -3676,6 +3748,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -3690,6 +3763,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -3704,6 +3778,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -3720,11 +3795,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3739,11 +3815,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3758,11 +3835,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3777,11 +3855,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3796,11 +3875,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -4337,6 +4417,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4349,6 +4430,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4361,6 +4443,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4373,6 +4456,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4385,6 +4469,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4397,6 +4482,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4409,6 +4495,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4421,6 +4508,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4433,6 +4521,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4445,6 +4534,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4457,6 +4547,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -4469,6 +4560,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -4481,6 +4573,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -4493,6 +4586,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -4505,6 +4599,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -4518,11 +4613,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -4533,11 +4629,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -4548,11 +4645,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -4563,11 +4661,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -4578,11 +4677,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -5117,6 +5217,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5129,6 +5230,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5141,6 +5243,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5153,6 +5256,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5165,6 +5269,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5177,6 +5282,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5189,6 +5295,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5201,6 +5308,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5213,6 +5321,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5225,6 +5334,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5237,6 +5347,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -5249,6 +5360,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -5261,6 +5373,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -5273,6 +5386,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -5285,6 +5399,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -5298,11 +5413,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -5313,11 +5429,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -5328,11 +5445,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -5343,11 +5461,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -5358,11 +5477,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -6014,6 +6134,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -6030,6 +6151,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -6046,6 +6168,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -6062,6 +6185,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -6078,6 +6202,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -6093,6 +6218,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6107,6 +6233,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6121,6 +6248,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6135,6 +6263,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6149,6 +6278,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6163,6 +6293,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6177,6 +6308,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6191,6 +6323,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6205,6 +6338,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6219,6 +6353,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6239,6 +6374,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6261,6 +6397,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6283,6 +6420,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6305,6 +6443,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6327,6 +6466,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -6984,6 +7124,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -7000,6 +7141,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -7016,6 +7158,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -7032,6 +7175,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -7048,6 +7192,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -7063,6 +7208,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7077,6 +7223,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7091,6 +7238,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7105,6 +7253,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -7119,6 +7268,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -7133,6 +7283,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -7147,6 +7298,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -7161,6 +7313,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -7175,6 +7328,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -7189,6 +7343,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -7209,6 +7364,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -7231,6 +7387,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -7253,6 +7410,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -7275,6 +7433,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -7297,6 +7456,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -7948,6 +8108,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -7963,6 +8124,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -7978,6 +8140,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -7993,6 +8156,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -8008,6 +8172,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -8023,6 +8188,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -8037,6 +8203,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -8051,6 +8218,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -8065,6 +8233,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -8079,6 +8248,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -8093,6 +8263,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -8107,6 +8278,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -8121,6 +8293,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -8135,6 +8308,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -8149,6 +8323,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -8169,6 +8344,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -8191,6 +8367,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -8213,6 +8390,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -8235,6 +8413,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -8257,6 +8436,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -8908,6 +9088,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -8923,6 +9104,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -8938,6 +9120,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -8953,6 +9136,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -8968,6 +9152,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -8983,6 +9168,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -8997,6 +9183,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -9011,6 +9198,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -9025,6 +9213,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -9039,6 +9228,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -9053,6 +9243,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -9067,6 +9258,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -9081,6 +9273,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -9095,6 +9288,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -9109,6 +9303,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -9129,6 +9324,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -9151,6 +9347,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -9173,6 +9370,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -9195,6 +9393,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -9217,6 +9416,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
index a1712a5ec7a27c..bcc57e77d693e1 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
@@ -647,6 +647,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -659,6 +660,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -671,6 +673,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -683,6 +686,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -695,6 +699,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -707,6 +712,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -719,6 +725,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -731,6 +738,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -743,6 +751,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -755,6 +764,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -767,6 +777,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -779,6 +790,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -791,6 +803,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -803,6 +816,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -815,6 +829,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -828,6 +843,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -842,6 +858,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -856,6 +873,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -870,6 +888,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -884,6 +903,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1167,6 +1187,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1179,6 +1200,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1191,6 +1213,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1203,6 +1226,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1215,6 +1239,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1227,6 +1252,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1239,6 +1265,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1251,6 +1278,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1263,6 +1291,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1275,6 +1304,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1287,6 +1317,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -1299,6 +1330,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -1311,6 +1343,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -1323,6 +1356,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -1335,6 +1369,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -1347,6 +1382,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1360,6 +1396,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1373,6 +1410,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1386,6 +1424,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1399,6 +1438,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1682,6 +1722,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -1694,6 +1735,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -1706,6 +1748,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -1718,6 +1761,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -1730,6 +1774,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -1742,6 +1787,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -1754,6 +1800,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -1766,6 +1813,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -1778,6 +1826,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -1790,6 +1839,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -1802,6 +1852,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -1814,6 +1865,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -1826,6 +1878,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -1838,6 +1891,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -1850,6 +1904,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -1863,11 +1918,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -1878,11 +1934,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -1893,11 +1950,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -1908,11 +1966,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -1923,11 +1982,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -2473,6 +2533,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2487,6 +2548,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2501,6 +2563,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2515,6 +2578,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2529,6 +2593,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2543,6 +2608,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2557,6 +2623,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2571,6 +2638,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2585,6 +2653,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2599,6 +2668,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2613,6 +2683,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2627,6 +2698,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2641,6 +2713,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2655,6 +2728,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2669,6 +2743,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2685,11 +2760,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2704,11 +2780,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2723,11 +2800,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2742,11 +2820,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2761,11 +2840,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3002,6 +3082,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3014,6 +3095,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3026,6 +3108,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3038,6 +3121,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3050,6 +3134,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3062,6 +3147,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3074,6 +3160,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3086,6 +3173,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3098,6 +3186,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3110,6 +3199,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3122,6 +3212,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -3134,6 +3225,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -3146,6 +3238,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -3158,6 +3251,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -3170,6 +3264,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -3183,11 +3278,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -3198,11 +3294,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -3213,11 +3310,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -3228,11 +3326,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -3243,11 +3342,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3537,6 +3637,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3549,6 +3650,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3561,6 +3663,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3573,6 +3676,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -3585,6 +3689,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -3597,6 +3702,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3609,6 +3715,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3621,6 +3728,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3633,6 +3741,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -3645,6 +3754,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -3657,6 +3767,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -3669,6 +3780,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -3681,6 +3793,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -3693,6 +3806,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -3705,6 +3819,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -3718,11 +3833,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -3733,11 +3849,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -3748,11 +3865,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -3763,11 +3881,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -3778,11 +3897,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -4109,6 +4229,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -4125,6 +4246,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -4141,6 +4263,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -4157,6 +4280,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -4173,6 +4297,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -4188,6 +4313,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -4202,6 +4328,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -4216,6 +4343,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -4230,6 +4358,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -4244,6 +4373,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -4258,6 +4388,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -4272,6 +4403,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -4286,6 +4418,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -4300,6 +4433,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -4314,6 +4448,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -4334,6 +4469,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -4356,6 +4492,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -4378,6 +4515,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -4400,6 +4538,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -4422,6 +4561,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -4754,6 +4894,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -4770,6 +4911,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -4786,6 +4928,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -4802,6 +4945,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -4818,6 +4962,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -4833,6 +4978,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -4847,6 +4993,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -4861,6 +5008,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -4875,6 +5023,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -4889,6 +5038,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -4903,6 +5053,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -4917,6 +5068,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -4931,6 +5083,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -4945,6 +5098,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -4959,6 +5113,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -4979,6 +5134,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -5001,6 +5157,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -5023,6 +5180,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -5045,6 +5203,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -5067,6 +5226,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -5398,6 +5558,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -5413,6 +5574,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -5428,6 +5590,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -5443,6 +5606,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -5458,6 +5622,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -5473,6 +5638,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -5487,6 +5653,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -5501,6 +5668,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -5515,6 +5683,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -5529,6 +5698,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -5543,6 +5713,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -5557,6 +5728,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -5571,6 +5743,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -5585,6 +5758,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -5599,6 +5773,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -5619,6 +5794,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -5641,6 +5817,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -5663,6 +5840,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -5685,6 +5863,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -5707,6 +5886,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -6038,6 +6218,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -6053,6 +6234,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -6068,6 +6250,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -6083,6 +6266,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -6098,6 +6282,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -6113,6 +6298,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6127,6 +6313,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6141,6 +6328,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6155,6 +6343,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6169,6 +6358,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6183,6 +6373,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6197,6 +6388,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6211,6 +6403,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6225,6 +6418,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6239,6 +6433,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6259,6 +6454,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6281,6 +6477,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6303,6 +6500,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6325,6 +6523,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6347,6 +6546,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll
index e9b096e8c6c44b..150306ecb73a5c 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll
@@ -662,6 +662,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -674,6 +675,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -686,6 +688,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -698,6 +701,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -710,6 +714,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -722,6 +727,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -734,6 +740,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -746,6 +753,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -758,6 +766,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -770,6 +779,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -782,6 +792,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -794,6 +805,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -806,6 +818,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -818,6 +831,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -830,6 +844,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -843,6 +858,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -857,6 +873,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -871,6 +888,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -885,6 +903,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -899,6 +918,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1272,6 +1292,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1284,6 +1305,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1296,6 +1318,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1308,6 +1331,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1320,6 +1344,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1332,6 +1357,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1344,6 +1370,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1356,6 +1383,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1368,6 +1396,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1380,6 +1409,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1392,6 +1422,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -1404,6 +1435,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -1416,6 +1448,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -1428,6 +1461,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -1440,6 +1474,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -1452,6 +1487,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1465,6 +1501,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1478,6 +1515,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1491,6 +1529,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1504,6 +1543,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1912,6 +1952,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -1924,6 +1965,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -1936,6 +1978,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -1948,6 +1991,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -1960,6 +2004,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -1972,6 +2017,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -1984,6 +2030,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -1996,6 +2043,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2008,6 +2056,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2020,6 +2069,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2032,6 +2082,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2044,6 +2095,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2056,6 +2108,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2068,6 +2121,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2080,6 +2134,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2093,11 +2148,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -2108,11 +2164,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2123,11 +2180,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2138,11 +2196,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2153,11 +2212,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -2693,6 +2753,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2707,6 +2768,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2721,6 +2783,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2735,6 +2798,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2749,6 +2813,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2763,6 +2828,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2777,6 +2843,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2791,6 +2858,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2805,6 +2873,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2819,6 +2888,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2833,6 +2903,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2847,6 +2918,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2861,6 +2933,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2875,6 +2948,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2889,6 +2963,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2905,11 +2980,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2924,11 +3000,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2943,11 +3020,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2962,11 +3040,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2981,11 +3060,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3267,6 +3347,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3279,6 +3360,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3291,6 +3373,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3303,6 +3386,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3315,6 +3399,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3327,6 +3412,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3339,6 +3425,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3351,6 +3438,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3363,6 +3451,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3375,6 +3464,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3387,6 +3477,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -3399,6 +3490,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -3411,6 +3503,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -3423,6 +3516,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -3435,6 +3529,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -3448,11 +3543,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -3463,11 +3559,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -3478,11 +3575,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -3493,11 +3591,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -3508,11 +3607,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3792,6 +3892,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3804,6 +3905,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3816,6 +3918,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3828,6 +3931,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -3840,6 +3944,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -3852,6 +3957,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3864,6 +3970,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3876,6 +3983,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3888,6 +3996,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -3900,6 +4009,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -3912,6 +4022,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -3924,6 +4035,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -3936,6 +4048,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -3948,6 +4061,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -3960,6 +4074,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -3973,11 +4088,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -3988,11 +4104,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -4003,11 +4120,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -4018,11 +4136,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -4033,11 +4152,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -4619,6 +4739,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -4635,6 +4756,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -4651,6 +4773,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -4667,6 +4790,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -4683,6 +4807,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -4698,6 +4823,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -4712,6 +4838,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -4726,6 +4853,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -4740,6 +4868,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -4754,6 +4883,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -4768,6 +4898,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -4782,6 +4913,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -4796,6 +4928,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -4810,6 +4943,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -4824,6 +4958,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -4844,6 +4979,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -4866,6 +5002,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -4888,6 +5025,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -4910,6 +5048,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -4932,6 +5071,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -5519,6 +5659,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -5535,6 +5676,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -5551,6 +5693,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -5567,6 +5710,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -5583,6 +5727,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -5598,6 +5743,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -5612,6 +5758,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -5626,6 +5773,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -5640,6 +5788,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -5654,6 +5803,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -5668,6 +5818,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -5682,6 +5833,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -5696,6 +5848,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -5710,6 +5863,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -5724,6 +5878,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -5744,6 +5899,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -5766,6 +5922,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -5788,6 +5945,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -5810,6 +5968,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -5832,6 +5991,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -6413,6 +6573,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -6428,6 +6589,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -6443,6 +6605,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -6458,6 +6621,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -6473,6 +6637,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -6488,6 +6653,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6502,6 +6668,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6516,6 +6683,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6530,6 +6698,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6544,6 +6713,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6558,6 +6728,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6572,6 +6743,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6586,6 +6758,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6600,6 +6773,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6614,6 +6788,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6634,6 +6809,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6656,6 +6832,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6678,6 +6855,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6700,6 +6878,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6722,6 +6901,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -7303,6 +7483,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -7318,6 +7499,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -7333,6 +7515,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -7348,6 +7531,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -7363,6 +7547,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -7378,6 +7563,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7392,6 +7578,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7406,6 +7593,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7420,6 +7608,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -7434,6 +7623,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -7448,6 +7638,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -7462,6 +7653,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -7476,6 +7668,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -7490,6 +7683,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -7504,6 +7698,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -7524,6 +7719,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -7546,6 +7742,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -7568,6 +7765,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -7590,6 +7788,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -7612,6 +7811,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll
index 4f9e520997a22f..18da57f7a82c18 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll
@@ -1122,6 +1122,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1134,6 +1135,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1146,6 +1148,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -1158,6 +1161,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1170,6 +1174,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1182,6 +1187,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1194,6 +1200,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1206,6 +1213,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -1218,6 +1226,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1230,6 +1239,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1242,6 +1252,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -1254,6 +1265,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -1266,6 +1278,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -1278,6 +1291,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -1290,6 +1304,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -1303,6 +1318,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1317,6 +1333,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1331,6 +1348,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1345,6 +1363,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1359,6 +1378,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1887,6 +1907,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1899,6 +1920,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1911,6 +1933,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1923,6 +1946,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1935,6 +1959,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1947,6 +1972,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1959,6 +1985,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1971,6 +1998,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1983,6 +2011,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1995,6 +2024,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -2007,6 +2037,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -2019,6 +2050,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -2031,6 +2063,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -2043,6 +2076,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -2055,6 +2089,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -2067,6 +2102,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -2080,6 +2116,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -2093,6 +2130,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -2106,6 +2144,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -2119,6 +2158,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -2657,6 +2697,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2669,6 +2710,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2681,6 +2723,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2693,6 +2736,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2705,6 +2749,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2717,6 +2762,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2729,6 +2775,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2741,6 +2788,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2753,6 +2801,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2765,6 +2814,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2777,6 +2827,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2789,6 +2840,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2801,6 +2853,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2813,6 +2866,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2825,6 +2879,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2838,11 +2893,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -2853,11 +2909,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2868,11 +2925,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2883,11 +2941,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2898,11 +2957,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3508,6 +3568,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3522,6 +3583,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3536,6 +3598,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -3550,6 +3613,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3564,6 +3628,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3578,6 +3643,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3592,6 +3658,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3606,6 +3673,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -3620,6 +3688,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3634,6 +3703,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3648,6 +3718,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -3662,6 +3733,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -3676,6 +3748,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -3690,6 +3763,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -3704,6 +3778,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -3720,11 +3795,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3739,11 +3815,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3758,11 +3835,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3777,11 +3855,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3796,11 +3875,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -4337,6 +4417,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4349,6 +4430,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4361,6 +4443,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4373,6 +4456,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4385,6 +4469,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4397,6 +4482,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4409,6 +4495,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4421,6 +4508,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4433,6 +4521,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4445,6 +4534,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4457,6 +4547,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -4469,6 +4560,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -4481,6 +4573,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -4493,6 +4586,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -4505,6 +4599,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -4518,11 +4613,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -4533,11 +4629,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -4548,11 +4645,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -4563,11 +4661,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -4578,11 +4677,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -5117,6 +5217,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5129,6 +5230,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5141,6 +5243,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5153,6 +5256,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5165,6 +5269,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5177,6 +5282,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5189,6 +5295,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5201,6 +5308,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5213,6 +5321,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5225,6 +5334,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5237,6 +5347,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -5249,6 +5360,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -5261,6 +5373,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -5273,6 +5386,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -5285,6 +5399,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -5298,11 +5413,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -5313,11 +5429,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -5328,11 +5445,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -5343,11 +5461,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -5358,11 +5477,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -6014,6 +6134,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -6030,6 +6151,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -6046,6 +6168,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -6062,6 +6185,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -6078,6 +6202,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -6093,6 +6218,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6107,6 +6233,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6121,6 +6248,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6135,6 +6263,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6149,6 +6278,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6163,6 +6293,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6177,6 +6308,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6191,6 +6323,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6205,6 +6338,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6219,6 +6353,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6239,6 +6374,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6261,6 +6397,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6283,6 +6420,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6305,6 +6443,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6327,6 +6466,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -6984,6 +7124,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -7000,6 +7141,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -7016,6 +7158,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -7032,6 +7175,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -7048,6 +7192,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -7063,6 +7208,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7077,6 +7223,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7091,6 +7238,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7105,6 +7253,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -7119,6 +7268,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -7133,6 +7283,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -7147,6 +7298,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -7161,6 +7313,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -7175,6 +7328,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -7189,6 +7343,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -7209,6 +7364,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -7231,6 +7387,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -7253,6 +7410,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -7275,6 +7433,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -7297,6 +7456,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -7948,6 +8108,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -7963,6 +8124,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -7978,6 +8140,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -7993,6 +8156,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -8008,6 +8172,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -8023,6 +8188,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -8037,6 +8203,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -8051,6 +8218,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -8065,6 +8233,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -8079,6 +8248,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -8093,6 +8263,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -8107,6 +8278,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -8121,6 +8293,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -8135,6 +8308,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -8149,6 +8323,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -8169,6 +8344,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -8191,6 +8367,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -8213,6 +8390,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -8235,6 +8413,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -8257,6 +8436,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -8908,6 +9088,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -8923,6 +9104,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -8938,6 +9120,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -8953,6 +9136,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -8968,6 +9152,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -8983,6 +9168,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -8997,6 +9183,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -9011,6 +9198,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -9025,6 +9213,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -9039,6 +9228,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -9053,6 +9243,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -9067,6 +9258,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -9081,6 +9273,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -9095,6 +9288,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -9109,6 +9303,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -9129,6 +9324,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -9151,6 +9347,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -9173,6 +9370,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -9195,6 +9393,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -9217,6 +9416,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
index 3437ccc8be40d7..9e6941a723f728 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
@@ -1122,6 +1122,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1134,6 +1135,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1146,6 +1148,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -1158,6 +1161,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1170,6 +1174,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1182,6 +1187,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1194,6 +1200,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1206,6 +1213,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -1218,6 +1226,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1230,6 +1239,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1242,6 +1252,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -1254,6 +1265,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -1266,6 +1278,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -1278,6 +1291,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -1290,6 +1304,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -1303,6 +1318,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1317,6 +1333,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1331,6 +1348,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1345,6 +1363,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1359,6 +1378,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1887,6 +1907,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1899,6 +1920,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1911,6 +1933,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1923,6 +1946,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1935,6 +1959,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1947,6 +1972,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1959,6 +1985,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1971,6 +1998,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1983,6 +2011,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1995,6 +2024,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -2007,6 +2037,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -2019,6 +2050,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -2031,6 +2063,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -2043,6 +2076,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -2055,6 +2089,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -2067,6 +2102,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -2080,6 +2116,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -2093,6 +2130,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -2106,6 +2144,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -2119,6 +2158,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -2657,6 +2697,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2669,6 +2710,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2681,6 +2723,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2693,6 +2736,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2705,6 +2749,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2717,6 +2762,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2729,6 +2775,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2741,6 +2788,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2753,6 +2801,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2765,6 +2814,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2777,6 +2827,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2789,6 +2840,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2801,6 +2853,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2813,6 +2866,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2825,6 +2879,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2838,11 +2893,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -2853,11 +2909,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2868,11 +2925,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2883,11 +2941,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2898,11 +2957,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3508,6 +3568,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3522,6 +3583,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3536,6 +3598,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -3550,6 +3613,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3564,6 +3628,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3578,6 +3643,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3592,6 +3658,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3606,6 +3673,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -3620,6 +3688,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3634,6 +3703,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3648,6 +3718,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -3662,6 +3733,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -3676,6 +3748,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -3690,6 +3763,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -3704,6 +3778,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -3720,11 +3795,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3739,11 +3815,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3758,11 +3835,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3777,11 +3855,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3796,11 +3875,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -4337,6 +4417,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4349,6 +4430,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4361,6 +4443,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4373,6 +4456,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4385,6 +4469,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4397,6 +4482,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4409,6 +4495,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4421,6 +4508,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4433,6 +4521,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4445,6 +4534,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4457,6 +4547,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -4469,6 +4560,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -4481,6 +4573,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -4493,6 +4586,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -4505,6 +4599,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -4518,11 +4613,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -4533,11 +4629,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -4548,11 +4645,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -4563,11 +4661,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -4578,11 +4677,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -5117,6 +5217,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5129,6 +5230,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5141,6 +5243,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5153,6 +5256,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5165,6 +5269,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5177,6 +5282,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5189,6 +5295,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5201,6 +5308,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5213,6 +5321,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5225,6 +5334,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5237,6 +5347,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -5249,6 +5360,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -5261,6 +5373,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -5273,6 +5386,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -5285,6 +5399,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -5298,11 +5413,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -5313,11 +5429,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -5328,11 +5445,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -5343,11 +5461,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -5358,11 +5477,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -6014,6 +6134,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -6030,6 +6151,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -6046,6 +6168,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -6062,6 +6185,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -6078,6 +6202,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -6093,6 +6218,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6107,6 +6233,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6121,6 +6248,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6135,6 +6263,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6149,6 +6278,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6163,6 +6293,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6177,6 +6308,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6191,6 +6323,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6205,6 +6338,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6219,6 +6353,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6239,6 +6374,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6261,6 +6397,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6283,6 +6420,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6305,6 +6443,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6327,6 +6466,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -6984,6 +7124,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -7000,6 +7141,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -7016,6 +7158,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -7032,6 +7175,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -7048,6 +7192,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -7063,6 +7208,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7077,6 +7223,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7091,6 +7238,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7105,6 +7253,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -7119,6 +7268,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -7133,6 +7283,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -7147,6 +7298,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -7161,6 +7313,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -7175,6 +7328,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -7189,6 +7343,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -7209,6 +7364,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -7231,6 +7387,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -7253,6 +7410,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -7275,6 +7433,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -7297,6 +7456,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -7948,6 +8108,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -7963,6 +8124,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -7978,6 +8140,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -7993,6 +8156,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -8008,6 +8172,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -8023,6 +8188,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -8037,6 +8203,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -8051,6 +8218,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -8065,6 +8233,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -8079,6 +8248,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -8093,6 +8263,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -8107,6 +8278,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -8121,6 +8293,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -8135,6 +8308,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -8149,6 +8323,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -8169,6 +8344,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -8191,6 +8367,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -8213,6 +8390,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -8235,6 +8413,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -8257,6 +8436,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -8908,6 +9088,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -8923,6 +9104,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -8938,6 +9120,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -8953,6 +9136,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -8968,6 +9152,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -8983,6 +9168,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -8997,6 +9183,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -9011,6 +9198,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -9025,6 +9213,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -9039,6 +9228,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -9053,6 +9243,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -9067,6 +9258,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -9081,6 +9273,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -9095,6 +9288,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -9109,6 +9303,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -9129,6 +9324,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -9151,6 +9347,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -9173,6 +9370,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -9195,6 +9393,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -9217,6 +9416,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
index ee5fbe39b4492c..b650f8c92df057 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
@@ -682,6 +682,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -694,6 +695,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -706,6 +708,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -718,6 +721,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -730,6 +734,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -742,6 +747,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -754,6 +760,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -766,6 +773,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -778,6 +786,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -790,6 +799,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -802,6 +812,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -814,6 +825,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -826,6 +838,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -838,6 +851,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -850,6 +864,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -863,6 +878,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -877,6 +893,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -891,6 +908,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -905,6 +923,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -919,6 +938,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1202,6 +1222,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1214,6 +1235,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1226,6 +1248,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1238,6 +1261,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1250,6 +1274,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1262,6 +1287,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1274,6 +1300,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1286,6 +1313,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1298,6 +1326,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1310,6 +1339,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1322,6 +1352,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -1334,6 +1365,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -1346,6 +1378,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -1358,6 +1391,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -1370,6 +1404,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -1382,6 +1417,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1395,6 +1431,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1408,6 +1445,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1421,6 +1459,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1434,6 +1473,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1752,6 +1792,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -1764,6 +1805,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -1776,6 +1818,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -1788,6 +1831,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -1800,6 +1844,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -1812,6 +1857,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -1824,6 +1870,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -1836,6 +1883,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -1848,6 +1896,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -1860,6 +1909,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -1872,6 +1922,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -1884,6 +1935,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -1896,6 +1948,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -1908,6 +1961,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -1920,6 +1974,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -1933,11 +1988,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -1948,11 +2004,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -1963,11 +2020,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -1978,11 +2036,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -1993,11 +2052,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -2543,6 +2603,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2557,6 +2618,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2571,6 +2633,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2585,6 +2648,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2599,6 +2663,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2613,6 +2678,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2627,6 +2693,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2641,6 +2708,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2655,6 +2723,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2669,6 +2738,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2683,6 +2753,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2697,6 +2768,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2711,6 +2783,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2725,6 +2798,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2739,6 +2813,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2755,11 +2830,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2774,11 +2850,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2793,11 +2870,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2812,11 +2890,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2831,11 +2910,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3127,6 +3207,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3139,6 +3220,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3151,6 +3233,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3163,6 +3246,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3175,6 +3259,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3187,6 +3272,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3199,6 +3285,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3211,6 +3298,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3223,6 +3311,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3235,6 +3324,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3247,6 +3337,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -3259,6 +3350,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -3271,6 +3363,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -3283,6 +3376,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -3295,6 +3389,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -3308,11 +3403,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -3323,11 +3419,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -3338,11 +3435,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -3353,11 +3451,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -3368,11 +3467,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3662,6 +3762,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3674,6 +3775,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3686,6 +3788,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3698,6 +3801,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -3710,6 +3814,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -3722,6 +3827,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3734,6 +3840,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3746,6 +3853,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3758,6 +3866,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -3770,6 +3879,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -3782,6 +3892,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -3794,6 +3905,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -3806,6 +3918,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -3818,6 +3931,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -3830,6 +3944,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -3843,11 +3958,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -3858,11 +3974,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -3873,11 +3990,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -3888,11 +4006,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -3903,11 +4022,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -4234,6 +4354,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -4250,6 +4371,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -4266,6 +4388,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -4282,6 +4405,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -4298,6 +4422,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -4313,6 +4438,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -4327,6 +4453,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -4341,6 +4468,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -4355,6 +4483,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -4369,6 +4498,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -4383,6 +4513,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -4397,6 +4528,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -4411,6 +4543,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -4425,6 +4558,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -4439,6 +4573,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -4459,6 +4594,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -4481,6 +4617,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -4503,6 +4640,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -4525,6 +4663,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -4547,6 +4686,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -4879,6 +5019,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -4895,6 +5036,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -4911,6 +5053,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -4927,6 +5070,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -4943,6 +5087,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -4958,6 +5103,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -4972,6 +5118,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -4986,6 +5133,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -5000,6 +5148,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -5014,6 +5163,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -5028,6 +5178,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -5042,6 +5193,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -5056,6 +5208,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -5070,6 +5223,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -5084,6 +5238,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -5104,6 +5259,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -5126,6 +5282,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -5148,6 +5305,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -5170,6 +5328,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -5192,6 +5351,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -5523,6 +5683,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -5538,6 +5699,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -5553,6 +5715,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -5568,6 +5731,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -5583,6 +5747,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -5598,6 +5763,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -5612,6 +5778,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -5626,6 +5793,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -5640,6 +5808,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -5654,6 +5823,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -5668,6 +5838,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -5682,6 +5853,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -5696,6 +5868,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -5710,6 +5883,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -5724,6 +5898,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -5744,6 +5919,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -5766,6 +5942,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -5788,6 +5965,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -5810,6 +5988,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -5832,6 +6011,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -6163,6 +6343,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -6178,6 +6359,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -6193,6 +6375,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -6208,6 +6391,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -6223,6 +6407,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -6238,6 +6423,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6252,6 +6438,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6266,6 +6453,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6280,6 +6468,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6294,6 +6483,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6308,6 +6498,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6322,6 +6513,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6336,6 +6528,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6350,6 +6543,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6364,6 +6558,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6384,6 +6579,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6406,6 +6602,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6428,6 +6625,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6450,6 +6648,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6472,6 +6671,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll
index 2473147509dc87..866ee991f285e7 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll
@@ -1122,6 +1122,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1134,6 +1135,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1146,6 +1148,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -1158,6 +1161,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1170,6 +1174,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1182,6 +1187,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1194,6 +1200,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1206,6 +1213,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -1218,6 +1226,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1230,6 +1239,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1242,6 +1252,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -1254,6 +1265,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -1266,6 +1278,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -1278,6 +1291,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -1290,6 +1304,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -1303,6 +1318,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1317,6 +1333,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1331,6 +1348,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1345,6 +1363,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1359,6 +1378,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1887,6 +1907,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1899,6 +1920,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1911,6 +1933,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1923,6 +1946,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1935,6 +1959,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1947,6 +1972,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1959,6 +1985,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1971,6 +1998,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1983,6 +2011,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1995,6 +2024,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -2007,6 +2037,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -2019,6 +2050,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -2031,6 +2063,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -2043,6 +2076,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -2055,6 +2089,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -2067,6 +2102,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -2080,6 +2116,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -2093,6 +2130,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -2106,6 +2144,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -2119,6 +2158,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -2657,6 +2697,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2669,6 +2710,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2681,6 +2723,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2693,6 +2736,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2705,6 +2749,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2717,6 +2762,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2729,6 +2775,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2741,6 +2788,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2753,6 +2801,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2765,6 +2814,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2777,6 +2827,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2789,6 +2840,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2801,6 +2853,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2813,6 +2866,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2825,6 +2879,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2838,11 +2893,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -2853,11 +2909,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2868,11 +2925,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2883,11 +2941,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2898,11 +2957,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3508,6 +3568,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3522,6 +3583,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3536,6 +3598,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -3550,6 +3613,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3564,6 +3628,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3578,6 +3643,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3592,6 +3658,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3606,6 +3673,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -3620,6 +3688,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3634,6 +3703,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3648,6 +3718,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -3662,6 +3733,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -3676,6 +3748,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -3690,6 +3763,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -3704,6 +3778,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -3720,11 +3795,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3739,11 +3815,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3758,11 +3835,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3777,11 +3855,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3796,11 +3875,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -4337,6 +4417,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4349,6 +4430,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4361,6 +4443,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4373,6 +4456,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4385,6 +4469,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4397,6 +4482,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4409,6 +4495,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4421,6 +4508,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4433,6 +4521,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4445,6 +4534,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4457,6 +4547,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -4469,6 +4560,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -4481,6 +4573,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -4493,6 +4586,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -4505,6 +4599,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -4518,11 +4613,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -4533,11 +4629,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -4548,11 +4645,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -4563,11 +4661,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -4578,11 +4677,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -5117,6 +5217,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5129,6 +5230,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5141,6 +5243,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5153,6 +5256,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5165,6 +5269,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5177,6 +5282,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5189,6 +5295,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5201,6 +5308,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5213,6 +5321,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5225,6 +5334,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5237,6 +5347,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -5249,6 +5360,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -5261,6 +5373,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -5273,6 +5386,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -5285,6 +5399,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -5298,11 +5413,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -5313,11 +5429,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -5328,11 +5445,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -5343,11 +5461,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -5358,11 +5477,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -6014,6 +6134,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -6030,6 +6151,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -6046,6 +6168,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -6062,6 +6185,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -6078,6 +6202,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -6093,6 +6218,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6107,6 +6233,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6121,6 +6248,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6135,6 +6263,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6149,6 +6278,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6163,6 +6293,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6177,6 +6308,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6191,6 +6323,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6205,6 +6338,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6219,6 +6353,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6239,6 +6374,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6261,6 +6397,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6283,6 +6420,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6305,6 +6443,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6327,6 +6466,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -6984,6 +7124,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -7000,6 +7141,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -7016,6 +7158,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -7032,6 +7175,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -7048,6 +7192,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -7063,6 +7208,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7077,6 +7223,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7091,6 +7238,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7105,6 +7253,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -7119,6 +7268,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -7133,6 +7283,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -7147,6 +7298,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -7161,6 +7313,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -7175,6 +7328,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -7189,6 +7343,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -7209,6 +7364,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -7231,6 +7387,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -7253,6 +7410,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -7275,6 +7433,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -7297,6 +7456,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -7948,6 +8108,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -7963,6 +8124,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -7978,6 +8140,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -7993,6 +8156,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -8008,6 +8172,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -8023,6 +8188,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -8037,6 +8203,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -8051,6 +8218,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -8065,6 +8233,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -8079,6 +8248,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -8093,6 +8263,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -8107,6 +8278,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -8121,6 +8293,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -8135,6 +8308,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -8149,6 +8323,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -8169,6 +8344,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -8191,6 +8367,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -8213,6 +8390,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -8235,6 +8413,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -8257,6 +8436,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -8908,6 +9088,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -8923,6 +9104,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -8938,6 +9120,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -8953,6 +9136,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -8968,6 +9152,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -8983,6 +9168,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -8997,6 +9183,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -9011,6 +9198,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -9025,6 +9213,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -9039,6 +9228,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -9053,6 +9243,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -9067,6 +9258,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -9081,6 +9273,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -9095,6 +9288,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -9109,6 +9303,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -9129,6 +9324,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -9151,6 +9347,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -9173,6 +9370,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -9195,6 +9393,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -9217,6 +9416,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2.ll
index 01317e09028c35..c50b534d864a9c 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2.ll
@@ -1127,6 +1127,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1139,6 +1140,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1151,6 +1153,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -1163,6 +1166,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1175,6 +1179,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1187,6 +1192,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1199,6 +1205,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1211,6 +1218,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -1223,6 +1231,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1235,6 +1244,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1247,6 +1257,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -1259,6 +1270,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -1271,6 +1283,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -1283,6 +1296,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -1295,6 +1309,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -1307,6 +1322,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1320,6 +1336,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1333,6 +1350,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1346,6 +1364,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1359,6 +1378,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1892,6 +1912,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1904,6 +1925,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1916,6 +1938,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1928,6 +1951,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1940,6 +1964,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1952,6 +1977,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1964,6 +1990,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1976,6 +2003,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1988,6 +2016,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -2000,6 +2029,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -2012,6 +2042,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -2024,6 +2055,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -2036,6 +2068,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -2048,6 +2081,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -2060,6 +2094,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -2072,6 +2107,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -2085,6 +2121,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -2098,6 +2135,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -2111,6 +2149,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -2124,6 +2163,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -2667,6 +2707,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2679,6 +2720,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2691,6 +2733,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2703,6 +2746,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2715,6 +2759,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2727,6 +2772,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2739,6 +2785,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2751,6 +2798,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2763,6 +2811,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2775,6 +2824,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2787,6 +2837,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2799,6 +2850,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2811,6 +2863,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2823,6 +2876,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2835,6 +2889,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2848,11 +2903,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -2863,11 +2919,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2878,11 +2935,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2893,11 +2951,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2908,11 +2967,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3523,6 +3583,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3537,6 +3598,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3551,6 +3613,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -3565,6 +3628,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3579,6 +3643,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3593,6 +3658,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3607,6 +3673,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3621,6 +3688,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -3635,6 +3703,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3649,6 +3718,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3663,6 +3733,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -3677,6 +3748,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -3691,6 +3763,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -3705,6 +3778,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -3719,6 +3793,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -3735,11 +3810,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3754,11 +3830,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3773,11 +3850,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3792,11 +3870,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3811,11 +3890,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -4357,6 +4437,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4369,6 +4450,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4381,6 +4463,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4393,6 +4476,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4405,6 +4489,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4417,6 +4502,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4429,6 +4515,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4441,6 +4528,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4453,6 +4541,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4465,6 +4554,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4477,6 +4567,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -4489,6 +4580,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -4501,6 +4593,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -4513,6 +4606,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -4525,6 +4619,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -4538,11 +4633,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -4553,11 +4649,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -4568,11 +4665,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -4583,11 +4681,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -4598,11 +4697,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -5142,6 +5242,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5154,6 +5255,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5166,6 +5268,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5178,6 +5281,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5190,6 +5294,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5202,6 +5307,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5214,6 +5320,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5226,6 +5333,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5238,6 +5346,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5250,6 +5359,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5262,6 +5372,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -5274,6 +5385,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -5286,6 +5398,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -5298,6 +5411,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -5310,6 +5424,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -5323,11 +5438,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -5338,11 +5454,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -5353,11 +5470,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -5368,11 +5486,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -5383,11 +5502,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -6019,6 +6139,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -6035,6 +6156,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -6051,6 +6173,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -6067,6 +6190,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -6083,6 +6207,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -6098,6 +6223,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6112,6 +6238,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6126,6 +6253,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6140,6 +6268,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6154,6 +6283,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6168,6 +6298,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6182,6 +6313,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6196,6 +6328,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6210,6 +6343,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6224,6 +6358,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6239,6 +6374,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6256,6 +6392,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6273,6 +6410,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6290,6 +6428,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6307,6 +6446,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -6944,6 +7084,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -6960,6 +7101,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -6976,6 +7118,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -6992,6 +7135,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -7008,6 +7152,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -7023,6 +7168,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7037,6 +7183,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7051,6 +7198,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7065,6 +7213,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -7079,6 +7228,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -7093,6 +7243,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -7107,6 +7258,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -7121,6 +7273,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -7135,6 +7288,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -7149,6 +7303,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -7164,6 +7319,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -7181,6 +7337,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -7198,6 +7355,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -7215,6 +7373,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -7232,6 +7391,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -7869,6 +8029,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -7885,6 +8046,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -7901,6 +8063,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -7917,6 +8080,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -7933,6 +8097,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -7948,6 +8113,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7962,6 +8128,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7976,6 +8143,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7990,6 +8158,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -8004,6 +8173,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -8018,6 +8188,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -8032,6 +8203,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -8046,6 +8218,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -8060,6 +8233,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -8074,6 +8248,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -8089,6 +8264,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -8106,6 +8282,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -8123,6 +8300,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -8140,6 +8318,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -8157,6 +8336,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -8794,6 +8974,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -8810,6 +8991,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -8826,6 +9008,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -8842,6 +9025,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -8858,6 +9042,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -8873,6 +9058,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -8887,6 +9073,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -8901,6 +9088,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -8915,6 +9103,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -8929,6 +9118,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -8943,6 +9133,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -8957,6 +9148,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -8971,6 +9163,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -8985,6 +9178,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -8999,6 +9193,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -9014,6 +9209,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -9031,6 +9227,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -9048,6 +9245,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -9065,6 +9263,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -9082,6 +9281,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll
index 83e383f335637c..6e647e34927c03 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll
@@ -632,6 +632,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -644,6 +645,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -656,6 +658,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -668,6 +671,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -680,6 +684,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -692,6 +697,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -704,6 +710,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -716,6 +723,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -728,6 +736,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -740,6 +749,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -752,6 +762,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -764,6 +775,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -776,6 +788,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -788,6 +801,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -800,6 +814,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -812,6 +827,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -825,6 +841,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -838,6 +855,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -851,6 +869,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -864,6 +883,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1237,6 +1257,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1249,6 +1270,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1261,6 +1283,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1273,6 +1296,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1285,6 +1309,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1297,6 +1322,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1309,6 +1335,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1321,6 +1348,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1333,6 +1361,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1345,6 +1374,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1357,6 +1387,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -1369,6 +1400,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -1381,6 +1413,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -1393,6 +1426,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -1405,6 +1439,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -1417,6 +1452,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1430,6 +1466,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1443,6 +1480,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1456,6 +1494,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1469,6 +1508,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1752,6 +1792,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -1764,6 +1805,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -1776,6 +1818,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -1788,6 +1831,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -1800,6 +1844,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -1812,6 +1857,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -1824,6 +1870,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -1836,6 +1883,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -1848,6 +1896,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -1860,6 +1909,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -1872,6 +1922,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -1884,6 +1935,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -1896,6 +1948,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -1908,6 +1961,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -1920,6 +1974,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -1933,11 +1988,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -1948,11 +2004,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -1963,11 +2020,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -1978,11 +2036,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -1993,11 +2052,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -2548,6 +2608,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2562,6 +2623,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2576,6 +2638,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2590,6 +2653,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2604,6 +2668,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2618,6 +2683,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2632,6 +2698,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2646,6 +2713,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2660,6 +2728,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2674,6 +2743,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2688,6 +2758,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2702,6 +2773,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2716,6 +2788,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2730,6 +2803,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2744,6 +2818,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2760,11 +2835,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2779,11 +2855,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2798,11 +2875,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2817,11 +2895,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2836,11 +2915,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3077,6 +3157,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3089,6 +3170,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3101,6 +3183,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3113,6 +3196,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3125,6 +3209,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3137,6 +3222,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3149,6 +3235,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3161,6 +3248,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3173,6 +3261,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3185,6 +3274,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3197,6 +3287,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -3209,6 +3300,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -3221,6 +3313,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -3233,6 +3326,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -3245,6 +3339,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -3258,11 +3353,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -3273,11 +3369,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -3288,11 +3385,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -3303,11 +3401,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -3318,11 +3417,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3602,6 +3702,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3614,6 +3715,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3626,6 +3728,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3638,6 +3741,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -3650,6 +3754,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -3662,6 +3767,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3674,6 +3780,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3686,6 +3793,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3698,6 +3806,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -3710,6 +3819,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -3722,6 +3832,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -3734,6 +3845,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -3746,6 +3858,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -3758,6 +3871,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -3770,6 +3884,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -3783,11 +3898,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -3798,11 +3914,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -3813,11 +3930,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -3828,11 +3946,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -3843,11 +3962,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -4139,6 +4259,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -4155,6 +4276,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -4171,6 +4293,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -4187,6 +4310,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -4203,6 +4327,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -4218,6 +4343,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -4232,6 +4358,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -4246,6 +4373,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -4260,6 +4388,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -4274,6 +4403,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -4288,6 +4418,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -4302,6 +4433,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -4316,6 +4448,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -4330,6 +4463,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -4344,6 +4478,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -4359,6 +4494,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -4376,6 +4512,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -4393,6 +4530,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -4410,6 +4548,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -4427,6 +4566,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -4724,6 +4864,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -4740,6 +4881,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -4756,6 +4898,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -4772,6 +4915,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -4788,6 +4932,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -4803,6 +4948,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -4817,6 +4963,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -4831,6 +4978,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -4845,6 +4993,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -4859,6 +5008,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -4873,6 +5023,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -4887,6 +5038,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -4901,6 +5053,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -4915,6 +5068,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -4929,6 +5083,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -4944,6 +5099,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -4961,6 +5117,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -4978,6 +5135,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -4995,6 +5153,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -5012,6 +5171,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -5309,6 +5469,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -5325,6 +5486,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -5341,6 +5503,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -5357,6 +5520,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -5373,6 +5537,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -5388,6 +5553,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -5402,6 +5568,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -5416,6 +5583,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -5430,6 +5598,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -5444,6 +5613,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -5458,6 +5628,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -5472,6 +5643,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -5486,6 +5658,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -5500,6 +5673,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -5514,6 +5688,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -5529,6 +5704,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -5546,6 +5722,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -5563,6 +5740,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -5580,6 +5758,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -5597,6 +5776,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -5894,6 +6074,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -5910,6 +6091,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -5926,6 +6108,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -5942,6 +6125,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -5958,6 +6142,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -5973,6 +6158,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -5987,6 +6173,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6001,6 +6188,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6015,6 +6203,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6029,6 +6218,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6043,6 +6233,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6057,6 +6248,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6071,6 +6263,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6085,6 +6278,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6099,6 +6293,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6114,6 +6309,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6131,6 +6327,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6148,6 +6345,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6165,6 +6363,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6182,6 +6381,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-outline_atomics.ll
index f9c1a2216dc2c1..4453de1d0e61f8 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-outline_atomics.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-outline_atomics.ll
@@ -637,6 +637,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -649,6 +650,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -661,6 +663,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -673,6 +676,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -685,6 +689,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -697,6 +702,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -709,6 +715,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -721,6 +728,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -733,6 +741,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -745,6 +754,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -757,6 +767,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -769,6 +780,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -781,6 +793,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -793,6 +806,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -805,6 +819,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -817,6 +832,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -830,6 +846,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -843,6 +860,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -856,6 +874,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -869,6 +888,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1232,6 +1252,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1244,6 +1265,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1256,6 +1278,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1268,6 +1291,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1280,6 +1304,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1292,6 +1317,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1304,6 +1330,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1316,6 +1343,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1328,6 +1356,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1340,6 +1369,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1352,6 +1382,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -1364,6 +1395,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -1376,6 +1408,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -1388,6 +1421,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -1400,6 +1434,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -1412,6 +1447,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1425,6 +1461,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1438,6 +1475,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1451,6 +1489,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1464,6 +1503,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1862,6 +1902,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -1874,6 +1915,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -1886,6 +1928,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -1898,6 +1941,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -1910,6 +1954,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -1922,6 +1967,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -1934,6 +1980,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -1946,6 +1993,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -1958,6 +2006,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -1970,6 +2019,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -1982,6 +2032,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -1994,6 +2045,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2006,6 +2058,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2018,6 +2071,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2030,6 +2084,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2043,11 +2098,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -2058,11 +2114,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2073,11 +2130,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2088,11 +2146,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2103,11 +2162,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -2648,6 +2708,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2662,6 +2723,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2676,6 +2738,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2690,6 +2753,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2704,6 +2768,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2718,6 +2783,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2732,6 +2798,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2746,6 +2813,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2760,6 +2828,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2774,6 +2843,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2788,6 +2858,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2802,6 +2873,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2816,6 +2888,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2830,6 +2903,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2844,6 +2918,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2860,11 +2935,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2879,11 +2955,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2898,11 +2975,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2917,11 +2995,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2936,11 +3015,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3212,6 +3292,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3224,6 +3305,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3236,6 +3318,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3248,6 +3331,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3260,6 +3344,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3272,6 +3357,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3284,6 +3370,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3296,6 +3383,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3308,6 +3396,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3320,6 +3409,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3332,6 +3422,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -3344,6 +3435,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -3356,6 +3448,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -3368,6 +3461,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -3380,6 +3474,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -3393,11 +3488,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -3408,11 +3504,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -3423,11 +3520,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -3438,11 +3536,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -3453,11 +3552,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3727,6 +3827,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3739,6 +3840,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3751,6 +3853,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3763,6 +3866,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -3775,6 +3879,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -3787,6 +3892,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3799,6 +3905,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3811,6 +3918,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3823,6 +3931,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -3835,6 +3944,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -3847,6 +3957,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -3859,6 +3970,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -3871,6 +3983,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -3883,6 +3996,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -3895,6 +4009,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -3908,11 +4023,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -3923,11 +4039,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -3938,11 +4055,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -3953,11 +4071,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -3968,11 +4087,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -4534,6 +4654,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -4550,6 +4671,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -4566,6 +4688,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -4582,6 +4705,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -4598,6 +4722,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -4613,6 +4738,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -4627,6 +4753,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -4641,6 +4768,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -4655,6 +4783,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -4669,6 +4798,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -4683,6 +4813,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -4697,6 +4828,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -4711,6 +4843,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -4725,6 +4858,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -4739,6 +4873,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -4754,6 +4889,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -4771,6 +4907,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -4788,6 +4925,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -4805,6 +4943,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -4822,6 +4961,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -5389,6 +5529,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -5405,6 +5546,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -5421,6 +5563,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -5437,6 +5580,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -5453,6 +5597,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -5468,6 +5613,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -5482,6 +5628,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -5496,6 +5643,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -5510,6 +5658,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -5524,6 +5673,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -5538,6 +5688,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -5552,6 +5703,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -5566,6 +5718,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -5580,6 +5733,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -5594,6 +5748,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -5609,6 +5764,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -5626,6 +5782,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -5643,6 +5800,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -5660,6 +5818,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -5677,6 +5836,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -6244,6 +6404,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -6260,6 +6421,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -6276,6 +6438,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -6292,6 +6455,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -6308,6 +6472,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -6323,6 +6488,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6337,6 +6503,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6351,6 +6518,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6365,6 +6533,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6379,6 +6548,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6393,6 +6563,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6407,6 +6578,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6421,6 +6593,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6435,6 +6608,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6449,6 +6623,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6464,6 +6639,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6481,6 +6657,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6498,6 +6675,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6515,6 +6693,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6532,6 +6711,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -7099,6 +7279,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -7115,6 +7296,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -7131,6 +7313,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -7147,6 +7330,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -7163,6 +7347,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -7178,6 +7363,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7192,6 +7378,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7206,6 +7393,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7220,6 +7408,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -7234,6 +7423,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -7248,6 +7438,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -7262,6 +7453,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -7276,6 +7468,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -7290,6 +7483,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -7304,6 +7498,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -7319,6 +7514,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -7336,6 +7532,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -7353,6 +7550,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -7370,6 +7568,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -7387,6 +7586,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc.ll
index 1bead6d694c652..85daa79c01210b 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc.ll
@@ -1127,6 +1127,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1139,6 +1140,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1151,6 +1153,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -1163,6 +1166,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1175,6 +1179,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1187,6 +1192,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1199,6 +1205,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1211,6 +1218,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -1223,6 +1231,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1235,6 +1244,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1247,6 +1257,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -1259,6 +1270,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -1271,6 +1283,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -1283,6 +1296,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -1295,6 +1309,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -1307,6 +1322,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1320,6 +1336,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1333,6 +1350,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1346,6 +1364,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1359,6 +1378,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1892,6 +1912,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1904,6 +1925,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1916,6 +1938,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1928,6 +1951,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1940,6 +1964,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1952,6 +1977,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1964,6 +1990,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1976,6 +2003,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1988,6 +2016,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -2000,6 +2029,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -2012,6 +2042,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -2024,6 +2055,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -2036,6 +2068,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -2048,6 +2081,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -2060,6 +2094,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -2072,6 +2107,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -2085,6 +2121,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -2098,6 +2135,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -2111,6 +2149,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -2124,6 +2163,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -2667,6 +2707,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2679,6 +2720,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2691,6 +2733,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2703,6 +2746,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2715,6 +2759,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2727,6 +2772,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2739,6 +2785,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2751,6 +2798,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2763,6 +2811,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2775,6 +2824,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2787,6 +2837,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2799,6 +2850,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2811,6 +2863,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2823,6 +2876,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2835,6 +2889,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2848,11 +2903,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -2863,11 +2919,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2878,11 +2935,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2893,11 +2951,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2908,11 +2967,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3523,6 +3583,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3537,6 +3598,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3551,6 +3613,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -3565,6 +3628,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3579,6 +3643,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3593,6 +3658,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3607,6 +3673,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3621,6 +3688,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -3635,6 +3703,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3649,6 +3718,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3663,6 +3733,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -3677,6 +3748,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -3691,6 +3763,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -3705,6 +3778,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -3719,6 +3793,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -3735,11 +3810,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3754,11 +3830,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3773,11 +3850,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3792,11 +3870,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3811,11 +3890,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -4357,6 +4437,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4369,6 +4450,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4381,6 +4463,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4393,6 +4476,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4405,6 +4489,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4417,6 +4502,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4429,6 +4515,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4441,6 +4528,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4453,6 +4541,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4465,6 +4554,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4477,6 +4567,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -4489,6 +4580,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -4501,6 +4593,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -4513,6 +4606,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -4525,6 +4619,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -4538,11 +4633,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -4553,11 +4649,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -4568,11 +4665,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -4583,11 +4681,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -4598,11 +4697,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -5142,6 +5242,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5154,6 +5255,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5166,6 +5268,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5178,6 +5281,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5190,6 +5294,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5202,6 +5307,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5214,6 +5320,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5226,6 +5333,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5238,6 +5346,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5250,6 +5359,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5262,6 +5372,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -5274,6 +5385,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -5286,6 +5398,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -5298,6 +5411,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -5310,6 +5424,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -5323,11 +5438,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -5338,11 +5454,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -5353,11 +5470,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -5368,11 +5486,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -5383,11 +5502,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -6019,6 +6139,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -6035,6 +6156,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -6051,6 +6173,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -6067,6 +6190,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -6083,6 +6207,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -6098,6 +6223,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6112,6 +6238,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6126,6 +6253,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6140,6 +6268,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6154,6 +6283,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6168,6 +6298,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6182,6 +6313,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6196,6 +6328,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6210,6 +6343,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6224,6 +6358,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6239,6 +6374,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6256,6 +6392,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6273,6 +6410,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6290,6 +6428,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6307,6 +6446,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -6944,6 +7084,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -6960,6 +7101,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -6976,6 +7118,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -6992,6 +7135,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -7008,6 +7152,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -7023,6 +7168,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7037,6 +7183,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7051,6 +7198,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7065,6 +7213,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -7079,6 +7228,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -7093,6 +7243,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -7107,6 +7258,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -7121,6 +7273,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -7135,6 +7288,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -7149,6 +7303,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -7164,6 +7319,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -7181,6 +7337,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -7198,6 +7355,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -7215,6 +7373,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -7232,6 +7391,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -7869,6 +8029,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -7885,6 +8046,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -7901,6 +8063,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -7917,6 +8080,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -7933,6 +8097,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -7948,6 +8113,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7962,6 +8128,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7976,6 +8143,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7990,6 +8158,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -8004,6 +8173,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -8018,6 +8188,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -8032,6 +8203,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -8046,6 +8218,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -8060,6 +8233,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -8074,6 +8248,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -8089,6 +8264,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -8106,6 +8282,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -8123,6 +8300,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -8140,6 +8318,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -8157,6 +8336,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -8794,6 +8974,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -8810,6 +8991,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -8826,6 +9008,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -8842,6 +9025,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -8858,6 +9042,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -8873,6 +9058,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -8887,6 +9073,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -8901,6 +9088,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -8915,6 +9103,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -8929,6 +9118,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -8943,6 +9133,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -8957,6 +9148,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -8971,6 +9163,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -8985,6 +9178,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -8999,6 +9193,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -9014,6 +9209,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -9031,6 +9227,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -9048,6 +9245,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -9065,6 +9263,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -9082,6 +9281,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll
index 51d9766f6a8f92..9780b48ce4b671 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll
@@ -1127,6 +1127,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1139,6 +1140,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1151,6 +1153,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -1163,6 +1166,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1175,6 +1179,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1187,6 +1192,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1199,6 +1205,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1211,6 +1218,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -1223,6 +1231,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1235,6 +1244,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1247,6 +1257,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -1259,6 +1270,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -1271,6 +1283,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -1283,6 +1296,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -1295,6 +1309,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -1307,6 +1322,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1320,6 +1336,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1333,6 +1350,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1346,6 +1364,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1359,6 +1378,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1892,6 +1912,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1904,6 +1925,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1916,6 +1938,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1928,6 +1951,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1940,6 +1964,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1952,6 +1977,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1964,6 +1990,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1976,6 +2003,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1988,6 +2016,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -2000,6 +2029,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -2012,6 +2042,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -2024,6 +2055,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -2036,6 +2068,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -2048,6 +2081,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -2060,6 +2094,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -2072,6 +2107,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -2085,6 +2121,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -2098,6 +2135,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -2111,6 +2149,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -2124,6 +2163,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -2667,6 +2707,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2679,6 +2720,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2691,6 +2733,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2703,6 +2746,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2715,6 +2759,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2727,6 +2772,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2739,6 +2785,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2751,6 +2798,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2763,6 +2811,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2775,6 +2824,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2787,6 +2837,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2799,6 +2850,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2811,6 +2863,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2823,6 +2876,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2835,6 +2889,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2848,11 +2903,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -2863,11 +2919,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2878,11 +2935,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2893,11 +2951,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2908,11 +2967,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3523,6 +3583,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3537,6 +3598,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3551,6 +3613,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -3565,6 +3628,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3579,6 +3643,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3593,6 +3658,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3607,6 +3673,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3621,6 +3688,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -3635,6 +3703,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3649,6 +3718,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3663,6 +3733,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -3677,6 +3748,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -3691,6 +3763,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -3705,6 +3778,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -3719,6 +3793,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -3735,11 +3810,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3754,11 +3830,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3773,11 +3850,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3792,11 +3870,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3811,11 +3890,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -4357,6 +4437,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4369,6 +4450,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4381,6 +4463,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4393,6 +4476,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4405,6 +4489,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4417,6 +4502,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4429,6 +4515,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4441,6 +4528,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4453,6 +4541,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4465,6 +4554,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4477,6 +4567,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -4489,6 +4580,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -4501,6 +4593,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -4513,6 +4606,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -4525,6 +4619,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -4538,11 +4633,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -4553,11 +4649,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -4568,11 +4665,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -4583,11 +4681,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -4598,11 +4697,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -5142,6 +5242,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5154,6 +5255,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5166,6 +5268,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5178,6 +5281,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5190,6 +5294,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5202,6 +5307,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5214,6 +5320,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5226,6 +5333,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5238,6 +5346,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5250,6 +5359,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5262,6 +5372,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -5274,6 +5385,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -5286,6 +5398,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -5298,6 +5411,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -5310,6 +5424,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -5323,11 +5438,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -5338,11 +5454,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -5353,11 +5470,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -5368,11 +5486,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -5383,11 +5502,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -6019,6 +6139,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -6035,6 +6156,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -6051,6 +6173,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -6067,6 +6190,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -6083,6 +6207,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -6098,6 +6223,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6112,6 +6238,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6126,6 +6253,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6140,6 +6268,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6154,6 +6283,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6168,6 +6298,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6182,6 +6313,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6196,6 +6328,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6210,6 +6343,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6224,6 +6358,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6239,6 +6374,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6256,6 +6392,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6273,6 +6410,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6290,6 +6428,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6307,6 +6446,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -6944,6 +7084,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -6960,6 +7101,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -6976,6 +7118,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -6992,6 +7135,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -7008,6 +7152,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -7023,6 +7168,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7037,6 +7183,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7051,6 +7198,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7065,6 +7213,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -7079,6 +7228,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -7093,6 +7243,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -7107,6 +7258,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -7121,6 +7273,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -7135,6 +7288,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -7149,6 +7303,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -7164,6 +7319,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -7181,6 +7337,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -7198,6 +7355,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -7215,6 +7373,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -7232,6 +7391,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -7869,6 +8029,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -7885,6 +8046,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -7901,6 +8063,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -7917,6 +8080,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -7933,6 +8097,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -7948,6 +8113,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7962,6 +8128,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7976,6 +8143,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7990,6 +8158,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -8004,6 +8173,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -8018,6 +8188,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -8032,6 +8203,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -8046,6 +8218,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -8060,6 +8233,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -8074,6 +8248,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -8089,6 +8264,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -8106,6 +8282,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -8123,6 +8300,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -8140,6 +8318,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -8157,6 +8336,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -8794,6 +8974,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -8810,6 +8991,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -8826,6 +9008,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -8842,6 +9025,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -8858,6 +9042,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -8873,6 +9058,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -8887,6 +9073,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -8901,6 +9088,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -8915,6 +9103,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -8929,6 +9118,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -8943,6 +9133,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -8957,6 +9148,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -8971,6 +9163,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -8985,6 +9178,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -8999,6 +9193,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -9014,6 +9209,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -9031,6 +9227,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -9048,6 +9245,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -9065,6 +9263,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -9082,6 +9281,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll
index 0c3ed9b0f1de0f..0bb582fd33216c 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll
@@ -657,6 +657,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -669,6 +670,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -681,6 +683,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -693,6 +696,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -705,6 +709,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -717,6 +722,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -729,6 +735,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -741,6 +748,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -753,6 +761,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -765,6 +774,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -777,6 +787,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -789,6 +800,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -801,6 +813,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -813,6 +826,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -825,6 +839,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -837,6 +852,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -850,6 +866,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -863,6 +880,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -876,6 +894,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -889,6 +908,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1262,6 +1282,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1274,6 +1295,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1286,6 +1308,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1298,6 +1321,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1310,6 +1334,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1322,6 +1347,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1334,6 +1360,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1346,6 +1373,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1358,6 +1386,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1370,6 +1399,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1382,6 +1412,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -1394,6 +1425,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -1406,6 +1438,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -1418,6 +1451,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -1430,6 +1464,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -1442,6 +1477,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1455,6 +1491,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1468,6 +1505,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1481,6 +1519,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1494,6 +1533,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1802,6 +1842,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -1814,6 +1855,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -1826,6 +1868,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -1838,6 +1881,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -1850,6 +1894,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -1862,6 +1907,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -1874,6 +1920,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -1886,6 +1933,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -1898,6 +1946,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -1910,6 +1959,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -1922,6 +1972,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -1934,6 +1985,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -1946,6 +1998,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -1958,6 +2011,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -1970,6 +2024,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -1983,11 +2038,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -1998,11 +2054,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2013,11 +2070,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2028,11 +2086,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2043,11 +2102,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -2598,6 +2658,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2612,6 +2673,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2626,6 +2688,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2640,6 +2703,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2654,6 +2718,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2668,6 +2733,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2682,6 +2748,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2696,6 +2763,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2710,6 +2778,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2724,6 +2793,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2738,6 +2808,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2752,6 +2823,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2766,6 +2838,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2780,6 +2853,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2794,6 +2868,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2810,11 +2885,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2829,11 +2905,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2848,11 +2925,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2867,11 +2945,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2886,11 +2965,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3172,6 +3252,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3184,6 +3265,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3196,6 +3278,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3208,6 +3291,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3220,6 +3304,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3232,6 +3317,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3244,6 +3330,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3256,6 +3343,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3268,6 +3356,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3280,6 +3369,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3292,6 +3382,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -3304,6 +3395,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -3316,6 +3408,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -3328,6 +3421,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -3340,6 +3434,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -3353,11 +3448,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -3368,11 +3464,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -3383,11 +3480,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -3398,11 +3496,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -3413,11 +3512,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3697,6 +3797,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3709,6 +3810,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3721,6 +3823,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3733,6 +3836,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -3745,6 +3849,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -3757,6 +3862,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3769,6 +3875,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3781,6 +3888,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3793,6 +3901,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -3805,6 +3914,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -3817,6 +3927,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -3829,6 +3940,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -3841,6 +3953,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -3853,6 +3966,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -3865,6 +3979,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -3878,11 +3993,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -3893,11 +4009,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -3908,11 +4025,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -3923,11 +4041,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -3938,11 +4057,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -4234,6 +4354,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -4250,6 +4371,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -4266,6 +4388,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -4282,6 +4405,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -4298,6 +4422,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -4313,6 +4438,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -4327,6 +4453,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -4341,6 +4468,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -4355,6 +4483,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -4369,6 +4498,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -4383,6 +4513,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -4397,6 +4528,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -4411,6 +4543,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -4425,6 +4558,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -4439,6 +4573,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -4454,6 +4589,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -4471,6 +4607,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -4488,6 +4625,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -4505,6 +4643,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -4522,6 +4661,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -4819,6 +4959,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -4835,6 +4976,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -4851,6 +4993,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -4867,6 +5010,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -4883,6 +5027,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -4898,6 +5043,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -4912,6 +5058,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -4926,6 +5073,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -4940,6 +5088,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -4954,6 +5103,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -4968,6 +5118,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -4982,6 +5133,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -4996,6 +5148,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -5010,6 +5163,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -5024,6 +5178,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -5039,6 +5194,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -5056,6 +5212,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -5073,6 +5230,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -5090,6 +5248,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -5107,6 +5266,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -5404,6 +5564,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -5420,6 +5581,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -5436,6 +5598,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -5452,6 +5615,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -5468,6 +5632,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -5483,6 +5648,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -5497,6 +5663,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -5511,6 +5678,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -5525,6 +5693,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -5539,6 +5708,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -5553,6 +5723,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -5567,6 +5738,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -5581,6 +5753,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -5595,6 +5768,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -5609,6 +5783,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -5624,6 +5799,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -5641,6 +5817,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -5658,6 +5835,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -5675,6 +5853,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -5692,6 +5871,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -5989,6 +6169,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -6005,6 +6186,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -6021,6 +6203,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -6037,6 +6220,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -6053,6 +6237,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -6068,6 +6253,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6082,6 +6268,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6096,6 +6283,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6110,6 +6298,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6124,6 +6313,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6138,6 +6328,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6152,6 +6343,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6166,6 +6358,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6180,6 +6373,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6194,6 +6388,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6209,6 +6404,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6226,6 +6422,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6243,6 +6440,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6260,6 +6458,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6277,6 +6476,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a.ll
index a58e5a987bb4c9..171ede54699795 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a.ll
@@ -1127,6 +1127,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1139,6 +1140,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1151,6 +1153,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -1163,6 +1166,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1175,6 +1179,7 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1187,6 +1192,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1199,6 +1205,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1211,6 +1218,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -1223,6 +1231,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1235,6 +1244,7 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1247,6 +1257,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -1259,6 +1270,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -1271,6 +1283,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -1283,6 +1296,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -1295,6 +1309,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -1307,6 +1322,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1320,6 +1336,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1333,6 +1350,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1346,6 +1364,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1359,6 +1378,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1892,6 +1912,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1904,6 +1925,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1916,6 +1938,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1928,6 +1951,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1940,6 +1964,7 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1952,6 +1977,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1964,6 +1990,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1976,6 +2003,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1988,6 +2016,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -2000,6 +2029,7 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -2012,6 +2042,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -2024,6 +2055,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -2036,6 +2068,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -2048,6 +2081,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -2060,6 +2094,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -2072,6 +2107,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -2085,6 +2121,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -2098,6 +2135,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -2111,6 +2149,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -2124,6 +2163,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -2667,6 +2707,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2679,6 +2720,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2691,6 +2733,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2703,6 +2746,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2715,6 +2759,7 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2727,6 +2772,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2739,6 +2785,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2751,6 +2798,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2763,6 +2811,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2775,6 +2824,7 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2787,6 +2837,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2799,6 +2850,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2811,6 +2863,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2823,6 +2876,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2835,6 +2889,7 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2848,11 +2903,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -2863,11 +2919,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2878,11 +2935,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2893,11 +2951,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2908,11 +2967,12 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3523,6 +3583,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3537,6 +3598,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3551,6 +3613,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -3565,6 +3628,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3579,6 +3643,7 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3593,6 +3658,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3607,6 +3673,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3621,6 +3688,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -3635,6 +3703,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3649,6 +3718,7 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3663,6 +3733,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -3677,6 +3748,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -3691,6 +3763,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -3705,6 +3778,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -3719,6 +3793,7 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -3735,11 +3810,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3754,11 +3830,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3773,11 +3850,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3792,11 +3870,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3811,11 +3890,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x1, x19
-; -O1: and x9, x0, x21
+; -O1: and x8, x0, x21
+; -O1: and x9, x1, x19
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -4357,6 +4437,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4369,6 +4450,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4381,6 +4463,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4393,6 +4476,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4405,6 +4489,7 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4417,6 +4502,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4429,6 +4515,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4441,6 +4528,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4453,6 +4541,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4465,6 +4554,7 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4477,6 +4567,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -4489,6 +4580,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -4501,6 +4593,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -4513,6 +4606,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -4525,6 +4619,7 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -4538,11 +4633,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -4553,11 +4649,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -4568,11 +4665,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -4583,11 +4681,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -4598,11 +4697,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x1, x19
-; -O1: orr x9, x0, x21
+; -O1: orr x8, x0, x21
+; -O1: orr x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -5142,6 +5242,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5154,6 +5255,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5166,6 +5268,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5178,6 +5281,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5190,6 +5294,7 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5202,6 +5307,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5214,6 +5320,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5226,6 +5333,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5238,6 +5346,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5250,6 +5359,7 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5262,6 +5372,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -5274,6 +5385,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -5286,6 +5398,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -5298,6 +5411,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -5310,6 +5424,7 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -5323,11 +5438,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -5338,11 +5454,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -5353,11 +5470,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -5368,11 +5486,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -5383,11 +5502,12 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x1, x19
-; -O1: eor x9, x0, x21
+; -O1: eor x8, x0, x21
+; -O1: eor x9, x1, x19
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -6019,6 +6139,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -6035,6 +6156,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -6051,6 +6173,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -6067,6 +6190,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -6083,6 +6207,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -6098,6 +6223,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6112,6 +6238,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6126,6 +6253,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6140,6 +6268,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6154,6 +6283,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6168,6 +6298,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6182,6 +6313,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6196,6 +6328,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6210,6 +6343,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6224,6 +6358,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6239,6 +6374,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6256,6 +6392,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6273,6 +6410,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6290,6 +6428,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6307,6 +6446,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -6944,6 +7084,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -6960,6 +7101,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -6976,6 +7118,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -6992,6 +7135,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -7008,6 +7152,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -7023,6 +7168,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7037,6 +7183,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7051,6 +7198,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7065,6 +7213,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -7079,6 +7228,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -7093,6 +7243,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -7107,6 +7258,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -7121,6 +7273,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -7135,6 +7288,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -7149,6 +7303,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -7164,6 +7319,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -7181,6 +7337,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -7198,6 +7355,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -7215,6 +7373,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -7232,6 +7391,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -7869,6 +8029,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -7885,6 +8046,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -7901,6 +8063,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -7917,6 +8080,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -7933,6 +8097,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -7948,6 +8113,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7962,6 +8128,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7976,6 +8143,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7990,6 +8158,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -8004,6 +8173,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -8018,6 +8188,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -8032,6 +8203,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -8046,6 +8218,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -8060,6 +8233,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -8074,6 +8248,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -8089,6 +8264,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -8106,6 +8282,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -8123,6 +8300,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -8140,6 +8318,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -8157,6 +8336,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -8794,6 +8974,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -8810,6 +8991,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -8826,6 +9008,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -8842,6 +9025,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -8858,6 +9042,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -8873,6 +9058,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -8887,6 +9073,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -8901,6 +9088,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -8915,6 +9103,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -8929,6 +9118,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -8943,6 +9133,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -8957,6 +9148,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -8971,6 +9163,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -8985,6 +9178,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -8999,6 +9193,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -9014,6 +9209,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -9031,6 +9227,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -9048,6 +9245,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -9065,6 +9263,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -9082,6 +9281,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
+; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
index 600c35b6862f6f..c7ab16757a0e74 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
@@ -5956,900 +5956,198 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr) #0 {
; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
-; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
-; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, s0, 0
-; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v3, s1, v3
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GFX7LESS-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX7LESS-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s14, -1
+; GFX7LESS-NEXT: s_mov_b32 s15, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s12, s12, s9
+; GFX7LESS-NEXT: s_addc_u32 s13, s13, 0
+; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
+; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
+; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX7LESS-NEXT: s_cbranch_execz .LBB9_3
; GFX7LESS-NEXT: ; %bb.1:
-; GFX7LESS-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-NEXT: s_load_dwordx2 s[42:43], s[36:37], 0x9
-; GFX7LESS-NEXT: s_bcnt1_i32_b64 s2, s[0:1]
-; GFX7LESS-NEXT: s_mov_b64 s[44:45], 0
-; GFX7LESS-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX7LESS-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX7LESS-NEXT: v_cvt_f64_u32_e32 v[1:2], s2
-; GFX7LESS-NEXT: v_or_b32_e32 v4, v0, v4
-; GFX7LESS-NEXT: v_mul_f64 v[41:42], v[1:2], 4.0
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, s0
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, s1
-; GFX7LESS-NEXT: v_or_b32_e32 v40, v4, v3
+; GFX7LESS-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], v[41:42]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-NEXT: s_waitcnt expcnt(2)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s42
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s43
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX7LESS-NEXT: s_cbranch_execnz .LBB9_2
-; GFX7LESS-NEXT: .LBB9_3:
+; GFX7LESS-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX7LESS-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], exec
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: v_mbcnt_lo_u32_b32 v3, s0, 0
-; GFX9-NEXT: s_add_u32 s48, s48, s9
-; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, s1, v3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GFX9-NEXT: s_movk_i32 s32, 0x800
-; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-NEXT: s_cbranch_execz .LBB9_3
; GFX9-NEXT: ; %bb.1:
-; GFX9-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
-; GFX9-NEXT: s_load_dwordx2 s[42:43], s[36:37], 0x24
-; GFX9-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
-; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-NEXT: s_mov_b32 s33, s8
-; GFX9-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX9-NEXT: s_mov_b32 s40, s7
-; GFX9-NEXT: s_mov_b32 s41, s6
-; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v4, s1
-; GFX9-NEXT: v_mov_b32_e32 v3, s0
-; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
+; GFX9-NEXT: s_and_b64 vcc, exec, -1
; GFX9-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_add_f64 v[0:1], v[3:4], v[41:42]
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: buffer_load_dword v3, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v4, off, s[48:51], 0 offset:4
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-NEXT: s_cbranch_execnz .LBB9_2
-; GFX9-NEXT: .LBB9_3:
+; GFX9-NEXT: s_mov_b64 vcc, vcc
+; GFX9-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX9-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-NEXT: s_mov_b32 s33, s8
-; GFX1064-NEXT: s_mov_b64 s[8:9], exec
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v3, s8, 0
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-NEXT: s_movk_i32 s32, 0x800
-; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v3, s9, v3
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-NEXT: s_cbranch_execz .LBB9_3
; GFX1064-NEXT: ; %bb.1:
-; GFX1064-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1064-NEXT: s_bcnt1_i32_b64 s0, s[8:9]
-; GFX1064-NEXT: s_mov_b32 s40, s7
-; GFX1064-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
-; GFX1064-NEXT: s_mov_b32 s41, s6
-; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1064-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
-; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: v_mov_b32_e32 v2, s1
-; GFX1064-NEXT: v_mov_b32_e32 v1, s0
+; GFX1064-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1064-NEXT: s_add_u32 s8, s36, 44
-; GFX1064-NEXT: s_addc_u32 s9, s37, 0
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-NEXT: s_cbranch_execnz .LBB9_2
-; GFX1064-NEXT: .LBB9_3:
+; GFX1064-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX1064-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s33, s8
-; GFX1032-NEXT: s_mov_b32 s8, exec_lo
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, s8, 0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-NEXT: s_mov_b32 s44, 0
-; GFX1032-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB9_3
; GFX1032-NEXT: ; %bb.1:
-; GFX1032-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1032-NEXT: s_bcnt1_i32_b32 s0, s8
-; GFX1032-NEXT: s_mov_b32 s40, s7
-; GFX1032-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
-; GFX1032-NEXT: s_mov_b32 s41, s6
-; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1032-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
-; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: v_mov_b32_e32 v2, s1
-; GFX1032-NEXT: v_mov_b32_e32 v1, s0
+; GFX1032-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1032-NEXT: s_add_u32 s8, s36, 44
-; GFX1032-NEXT: s_addc_u32 s9, s37, 0
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-NEXT: s_cbranch_execnz .LBB9_2
-; GFX1032-NEXT: .LBB9_3:
+; GFX1032-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX1032-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX1032-NEXT: s_endpgm
;
; GFX1164-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1164: ; %bb.0:
-; GFX1164-NEXT: s_mov_b32 s33, s8
-; GFX1164-NEXT: s_mov_b64 s[8:9], exec
-; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-NEXT: s_mov_b32 s32, 32
+; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, s9, v0
+; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-NEXT: s_cbranch_execz .LBB9_3
; GFX1164-NEXT: ; %bb.1:
-; GFX1164-NEXT: s_bcnt1_i32_b64 s0, s[8:9]
-; GFX1164-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1164-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
-; GFX1164-NEXT: s_mov_b32 s40, s7
-; GFX1164-NEXT: s_mov_b32 s41, s6
-; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: v_mov_b32_e32 v2, s1
-; GFX1164-NEXT: v_mov_b32_e32 v1, s0
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-NEXT: .p2align 6
+; GFX1164-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1164-NEXT: s_add_u32 s8, s36, 44
-; GFX1164-NEXT: s_addc_u32 s9, s37, 0
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-NEXT: s_mov_b32 s12, s41
-; GFX1164-NEXT: s_mov_b32 s13, s40
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: s_clause 0x1
-; GFX1164-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-NEXT: s_cbranch_execnz .LBB9_2
-; GFX1164-NEXT: .LBB9_3:
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX1164-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX1164-NEXT: s_endpgm
;
; GFX1132-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1132: ; %bb.0:
-; GFX1132-NEXT: s_mov_b32 s6, exec_lo
-; GFX1132-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-NEXT: s_mov_b32 s44, 0
-; GFX1132-NEXT: s_mov_b32 s32, 32
+; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_cbranch_execz .LBB9_3
; GFX1132-NEXT: ; %bb.1:
-; GFX1132-NEXT: s_bcnt1_i32_b32 s0, s6
-; GFX1132-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1132-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
-; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: s_mov_b32 s40, s14
-; GFX1132-NEXT: s_mov_b32 s41, s13
-; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-NEXT: .p2align 6
+; GFX1132-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1132-NEXT: s_add_u32 s8, s36, 44
-; GFX1132-NEXT: s_addc_u32 s9, s37, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-NEXT: s_mov_b32 s12, s41
-; GFX1132-NEXT: s_mov_b32 s13, s40
-; GFX1132-NEXT: s_mov_b32 s14, s33
-; GFX1132-NEXT: s_clause 0x1
-; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-NEXT: s_cbranch_execnz .LBB9_2
-; GFX1132-NEXT: .LBB9_3:
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX1132-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX1132-NEXT: s_endpgm
;
; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
-; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], exec
-; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, s0, 0
-; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v3, s1, v3
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GFX7LESS-DPP-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s14, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s15, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s12, s12, s9
+; GFX7LESS-DPP-NEXT: s_addc_u32 s13, s13, 0
+; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
+; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
+; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX7LESS-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX7LESS-DPP-NEXT: ; %bb.1:
-; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[42:43], s[36:37], 0x9
-; GFX7LESS-DPP-NEXT: s_bcnt1_i32_b64 s2, s[0:1]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX7LESS-DPP-NEXT: v_cvt_f64_u32_e32 v[1:2], s2
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v4, v0, v4
-; GFX7LESS-DPP-NEXT: v_mul_f64 v[41:42], v[1:2], 4.0
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, s0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, s1
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v4, v3
+; GFX7LESS-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-DPP-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], v[41:42]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(2)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB9_2
-; GFX7LESS-DPP-NEXT: .LBB9_3:
+; GFX7LESS-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-DPP-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX7LESS-DPP-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], exec
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s0, 0
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, s1, v3
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX9-DPP-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
-; GFX9-DPP-NEXT: s_load_dwordx2 s[42:43], s[36:37], 0x24
-; GFX9-DPP-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
-; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-DPP-NEXT: s_mov_b32 s33, s8
-; GFX9-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX9-DPP-NEXT: s_mov_b32 s40, s7
-; GFX9-DPP-NEXT: s_mov_b32 s41, s6
-; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: v_mov_b32_e32 v4, s1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s0
-; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
+; GFX9-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX9-DPP-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX9-DPP-NEXT: v_add_f64 v[0:1], v[3:4], v[41:42]
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: buffer_load_dword v3, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v4, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-DPP-NEXT: s_cbranch_execnz .LBB9_2
-; GFX9-DPP-NEXT: .LBB9_3:
+; GFX9-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX9-DPP-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX9-DPP-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1064-DPP-NEXT: s_mov_b64 s[8:9], exec
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s8, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, s9, v3
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s0, s[8:9]
-; GFX1064-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1064-DPP-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
-; GFX1064-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1064-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
-; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, s0
+; GFX1064-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-DPP-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-DPP-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1064-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB9_2
-; GFX1064-DPP-NEXT: .LBB9_3:
+; GFX1064-DPP-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX1064-DPP-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1032-DPP-NEXT: s_mov_b32 s8, exec_lo
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s8, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-DPP-NEXT: s_mov_b32 s44, 0
-; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1032-DPP-NEXT: s_bcnt1_i32_b32 s0, s8
-; GFX1032-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1032-DPP-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
-; GFX1032-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1032-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
-; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, s0
+; GFX1032-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-DPP-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1032-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB9_2
-; GFX1032-DPP-NEXT: .LBB9_3:
+; GFX1032-DPP-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX1032-DPP-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX1032-DPP-NEXT: s_endpgm
;
; GFX1164-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1164-DPP: ; %bb.0:
-; GFX1164-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1164-DPP-NEXT: s_mov_b64 s[8:9], exec
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
+; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, s9, v0
+; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_bcnt1_i32_b64 s0, s[8:9]
-; GFX1164-DPP-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1164-DPP-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
-; GFX1164-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-DPP-NEXT: .p2align 6
+; GFX1164-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-DPP-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX1164-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1164-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: s_clause 0x1
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB9_2
-; GFX1164-DPP-NEXT: .LBB9_3:
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-DPP-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX1164-DPP-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX1164-DPP-NEXT: s_endpgm
;
; GFX1132-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1132-DPP: ; %bb.0:
-; GFX1132-DPP-NEXT: s_mov_b32 s6, exec_lo
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-DPP-NEXT: s_mov_b32 s44, 0
-; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
+; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_bcnt1_i32_b32 s0, s6
-; GFX1132-DPP-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1132-DPP-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
-; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s40, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s41, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-DPP-NEXT: .p2align 6
+; GFX1132-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-DPP-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1132-DPP-NEXT: s_clause 0x1
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB9_2
-; GFX1132-DPP-NEXT: .LBB9_3:
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-DPP-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX1132-DPP-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX1132-DPP-NEXT: s_endpgm
%result = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") monotonic, align 4
ret void
@@ -6858,42 +6156,35 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe(ptr addrspace(1) %ptr) #0 {
; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX7LESS: ; %bb.0:
-; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX7LESS-NEXT: s_movk_i32 s32, 0x400
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s9
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s14, s8
+; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX7LESS-NEXT: s_add_u32 s8, s2, 44
+; GFX7LESS-NEXT: s_addc_u32 s9, s3, 0
+; GFX7LESS-NEXT: s_getpc_b64 s[2:3]
+; GFX7LESS-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX7LESS-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
+; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX7LESS-NEXT: s_mov_b32 s12, s6
+; GFX7LESS-NEXT: s_mov_b32 s13, s7
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
-; GFX7LESS-NEXT: v_mov_b32_e32 v41, 0
-; GFX7LESS-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0
+; GFX7LESS-NEXT: v_bfrev_b32_e32 v3, 1
; GFX7LESS-NEXT: .LBB10_1: ; %ComputeLoop
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_ff1_i32_b64 s4, s[0:1]
@@ -6903,7 +6194,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5]
; GFX7LESS-NEXT: v_cmp_ne_u64_e64 s[4:5], s[0:1], 0
; GFX7LESS-NEXT: s_and_b64 vcc, exec, s[4:5]
-; GFX7LESS-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX7LESS-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX7LESS-NEXT: s_cbranch_vccnz .LBB10_1
; GFX7LESS-NEXT: ; %bb.2: ; %ComputeEnd
; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
@@ -6913,99 +6204,50 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB10_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s46, -1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-NEXT: s_mov_b64 s[42:43], 0
+; GFX7LESS-NEXT: s_and_b64 vcc, exec, 0
; GFX7LESS-NEXT: .LBB10_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], v[41:42]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-NEXT: s_waitcnt expcnt(2)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[42:43], vcc, s[42:43]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[42:43]
-; GFX7LESS-NEXT: s_cbranch_execnz .LBB10_4
-; GFX7LESS-NEXT: .LBB10_5:
+; GFX7LESS-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-NEXT: s_cbranch_vccz .LBB10_4
+; GFX7LESS-NEXT: .LBB10_5: ; %UnifiedReturnBlock
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s9
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-NEXT: s_mov_b32 s33, s8
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX9-NEXT: s_mov_b32 s40, s7
-; GFX9-NEXT: s_mov_b32 s41, s6
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s9
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_add_u32 s8, s2, 44
+; GFX9-NEXT: s_addc_u32 s9, s3, 0
+; GFX9-NEXT: s_getpc_b64 s[2:3]
+; GFX9-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX9-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
+; GFX9-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
+; GFX9-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: s_movk_i32 s32, 0x800
+; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: v_mov_b32_e32 v41, 0
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_mov_b64 s[0:1], exec
-; GFX9-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX9-NEXT: v_bfrev_b32_e32 v3, 1
; GFX9-NEXT: .LBB10_1: ; %ComputeLoop
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: s_ff1_i32_b64 s4, s[0:1]
; GFX9-NEXT: v_readlane_b32 s3, v1, s4
; GFX9-NEXT: v_readlane_b32 s2, v0, s4
-; GFX9-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX9-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX9-NEXT: s_lshl_b64 s[2:3], 1, s4
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3]
; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0
@@ -7018,96 +6260,50 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX9-NEXT: s_cbranch_execz .LBB10_5
; GFX9-NEXT: ; %bb.3:
-; GFX9-NEXT: s_load_dwordx2 s[42:43], s[36:37], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX9-NEXT: s_and_b64 vcc, exec, 0
; GFX9-NEXT: .LBB10_4: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-NEXT: s_cbranch_execnz .LBB10_4
-; GFX9-NEXT: .LBB10_5:
+; GFX9-NEXT: s_mov_b64 vcc, vcc
+; GFX9-NEXT: s_cbranch_vccz .LBB10_4
+; GFX9-NEXT: .LBB10_5: ; %UnifiedReturnBlock
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b32 s33, s8
-; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX1064-NEXT: s_mov_b32 s40, s7
-; GFX1064-NEXT: s_mov_b32 s41, s6
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s9
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_mov_b32 s14, s8
+; GFX1064-NEXT: s_add_u32 s8, s2, 44
+; GFX1064-NEXT: s_addc_u32 s9, s3, 0
+; GFX1064-NEXT: s_getpc_b64 s[2:3]
+; GFX1064-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1064-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1064-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: s_movk_i32 s32, 0x800
+; GFX1064-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b32 s12, s6
+; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1064-NEXT: s_mov_b32 s13, s7
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: v_mov_b32_e32 v41, 0
-; GFX1064-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1064-NEXT: v_mov_b32_e32 v2, 0
+; GFX1064-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1064-NEXT: s_mov_b64 s[0:1], exec
; GFX1064-NEXT: .LBB10_1: ; %ComputeLoop
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-NEXT: s_ff1_i32_b64 s4, s[0:1]
; GFX1064-NEXT: v_readlane_b32 s3, v1, s4
; GFX1064-NEXT: v_readlane_b32 s2, v0, s4
-; GFX1064-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX1064-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX1064-NEXT: s_lshl_b64 s[2:3], 1, s4
; GFX1064-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3]
; GFX1064-NEXT: s_cmp_lg_u64 s[0:1], 0
@@ -7120,90 +6316,42 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1064-NEXT: s_cbranch_execz .LBB10_5
; GFX1064-NEXT: ; %bb.3:
-; GFX1064-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1064-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1064-NEXT: s_and_b64 vcc, exec, 0
; GFX1064-NEXT: .LBB10_4: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-NEXT: s_cbranch_execnz .LBB10_4
-; GFX1064-NEXT: .LBB10_5:
+; GFX1064-NEXT: s_cbranch_vccz .LBB10_4
+; GFX1064-NEXT: .LBB10_5: ; %UnifiedReturnBlock
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b32 s33, s8
-; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX1032-NEXT: s_mov_b32 s40, s7
-; GFX1032-NEXT: s_mov_b32 s41, s6
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s9
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_mov_b32 s14, s8
+; GFX1032-NEXT: s_add_u32 s8, s2, 44
+; GFX1032-NEXT: s_addc_u32 s9, s3, 0
+; GFX1032-NEXT: s_getpc_b64 s[2:3]
+; GFX1032-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1032-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1032-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b32 s12, s6
+; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1032-NEXT: s_mov_b32 s13, s7
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-NEXT: v_mov_b32_e32 v41, 0
-; GFX1032-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1032-NEXT: v_mov_b32_e32 v2, 0
+; GFX1032-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-NEXT: .LBB10_1: ; %ComputeLoop
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -7212,90 +6360,42 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-NEXT: v_readlane_b32 s2, v0, s1
; GFX1032-NEXT: s_lshl_b32 s1, 1, s1
; GFX1032-NEXT: s_andn2_b32 s0, s0, s1
-; GFX1032-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX1032-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX1032-NEXT: s_cmp_lg_u32 s0, 0
; GFX1032-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s44, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1032-NEXT: s_cbranch_execz .LBB10_5
; GFX1032-NEXT: ; %bb.3:
-; GFX1032-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1032-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1032-NEXT: s_mov_b32 vcc_lo, 0
; GFX1032-NEXT: .LBB10_4: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-NEXT: s_cbranch_execnz .LBB10_4
-; GFX1032-NEXT: .LBB10_5:
+; GFX1032-NEXT: s_cbranch_vccz .LBB10_4
+; GFX1032-NEXT: .LBB10_5: ; %UnifiedReturnBlock
; GFX1032-NEXT: s_endpgm
;
; GFX1164-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1164: ; %bb.0:
-; GFX1164-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1164-NEXT: s_mov_b32 s33, s8
-; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
+; GFX1164-NEXT: s_mov_b32 s14, s8
+; GFX1164-NEXT: s_add_u32 s8, s2, 44
+; GFX1164-NEXT: s_addc_u32 s9, s3, 0
+; GFX1164-NEXT: s_getpc_b64 s[2:3]
+; GFX1164-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1164-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1164-NEXT: v_mov_b32_e32 v31, v0
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s6
+; GFX1164-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1164-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1164-NEXT: s_mov_b32 s12, s6
; GFX1164-NEXT: s_mov_b32 s13, s7
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: s_mov_b32 s32, 32
-; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b32 s40, s7
-; GFX1164-NEXT: s_mov_b32 s41, s6
+; GFX1164-NEXT: s_mov_b32 s32, 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-NEXT: v_mov_b32_e32 v41, 0
-; GFX1164-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX1164-NEXT: s_swappc_b64 s[30:31], s[2:3]
+; GFX1164-NEXT: v_mov_b32_e32 v2, 0
+; GFX1164-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1164-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-NEXT: .LBB10_1: ; %ComputeLoop
; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -7304,7 +6404,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-NEXT: v_readlane_b32 s3, v1, s4
; GFX1164-NEXT: v_readlane_b32 s2, v0, s4
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX1164-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX1164-NEXT: s_lshl_b64 s[2:3], 1, s4
; GFX1164-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -7319,79 +6419,32 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1164-NEXT: s_cbranch_execz .LBB10_5
; GFX1164-NEXT: ; %bb.3:
-; GFX1164-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1164-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-NEXT: .p2align 6
+; GFX1164-NEXT: s_and_b64 vcc, exec, 0
; GFX1164-NEXT: .LBB10_4: ; %atomicrmw.start
; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s41
-; GFX1164-NEXT: s_mov_b32 s13, s40
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: s_clause 0x1
-; GFX1164-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-NEXT: s_cbranch_execnz .LBB10_4
-; GFX1164-NEXT: .LBB10_5:
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-NEXT: s_cbranch_vccz .LBB10_4
+; GFX1164-NEXT: .LBB10_5: ; %UnifiedReturnBlock
; GFX1164-NEXT: s_endpgm
;
; GFX1132-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1132: ; %bb.0:
-; GFX1132-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_add_u32 s8, s2, 44
+; GFX1132-NEXT: s_addc_u32 s9, s3, 0
+; GFX1132-NEXT: s_getpc_b64 s[2:3]
+; GFX1132-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1132-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1132-NEXT: v_mov_b32_e32 v31, v0
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b32 s40, s14
-; GFX1132-NEXT: s_mov_b32 s41, s13
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1132-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
+; GFX1132-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b32 s13, s14
; GFX1132-NEXT: s_mov_b32 s14, s15
-; GFX1132-NEXT: s_mov_b32 s32, 32
-; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: v_mov_b32_e32 v40, v0
+; GFX1132-NEXT: s_mov_b32 s32, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-NEXT: v_mov_b32_e32 v41, 0
-; GFX1132-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[2:3]
+; GFX1132-NEXT: v_mov_b32_e32 v2, 0
+; GFX1132-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: .LBB10_1: ; %ComputeLoop
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -7402,780 +6455,239 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-NEXT: s_lshl_b32 s1, 1, s1
; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1132-NEXT: s_and_not1_b32 s0, s0, s1
-; GFX1132-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX1132-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX1132-NEXT: s_cmp_lg_u32 s0, 0
; GFX1132-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s44, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1132-NEXT: s_cbranch_execz .LBB10_5
; GFX1132-NEXT: ; %bb.3:
-; GFX1132-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1132-NEXT: v_mov_b32_e32 v0, 0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-NEXT: .p2align 6
+; GFX1132-NEXT: s_mov_b32 vcc_lo, 0
; GFX1132-NEXT: .LBB10_4: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1132-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-NEXT: s_mov_b32 s12, s41
-; GFX1132-NEXT: s_mov_b32 s13, s40
-; GFX1132-NEXT: s_mov_b32 s14, s33
-; GFX1132-NEXT: s_clause 0x1
-; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-NEXT: s_cbranch_execnz .LBB10_4
-; GFX1132-NEXT: .LBB10_5:
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-NEXT: s_cbranch_vccz .LBB10_4
+; GFX1132-NEXT: .LBB10_5: ; %UnifiedReturnBlock
; GFX1132-NEXT: s_endpgm
;
; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX7LESS-DPP: ; %bb.0:
-; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[2:3], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x400
+; GFX7LESS-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX7LESS-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s8
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX7LESS-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX7LESS-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX7LESS-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX7LESS-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v42, v0, v2
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s6
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s7
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v41, v1
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[42:43], 0
+; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX7LESS-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-DPP-NEXT: .LBB10_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], v[40:41]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(2)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[42:43], vcc, s[42:43]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[42:43]
-; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB10_1
-; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX7LESS-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-DPP-NEXT: s_cbranch_vccnz .LBB10_1
+; GFX7LESS-DPP-NEXT: ; %bb.2: ; %DummyReturnBlock
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b32 s33, s8
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_mov_b32 s40, s7
-; GFX9-DPP-NEXT: s_mov_b32 s41, s6
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s14, s8
+; GFX9-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX9-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX9-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX9-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX9-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
+; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
+; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-DPP-NEXT: s_mov_b32 s12, s6
+; GFX9-DPP-NEXT: s_mov_b32 s13, s7
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX9-DPP-NEXT: v_bfrev_b32_e32 v9, 1
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX9-DPP-NEXT: s_not_b64 exec, exec
-; GFX9-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX9-DPP-NEXT: s_not_b64 exec, exec
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:1 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:1 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:2 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:2 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:8 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:8 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_bcast:15 row_mask:0xa bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_bcast:15 row_mask:0xa bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: s_nop 1
-; GFX9-DPP-NEXT: v_mov_b32_dpp v8, v10 row_bcast:31 row_mask:0xc bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v9, v11 row_bcast:31 row_mask:0xc bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[8:9], v[10:11], v[8:9]
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
+; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_readlane_b32 s43, v9, 63
-; GFX9-DPP-NEXT: v_readlane_b32 s42, v8, 63
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX9-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX9-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX9-DPP-NEXT: v_add_f64 v[3:4], v[1:2], s[42:43]
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45
-; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
-; GFX9-DPP-NEXT: s_cbranch_execnz .LBB10_2
-; GFX9-DPP-NEXT: .LBB10_3:
+; GFX9-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX9-DPP-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX9-DPP-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1064-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1064-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1064-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1064-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1064-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1064-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
+; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s6
+; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s7
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1064-DPP-NEXT: v_bfrev_b32_e32 v9, 1
-; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1064-DPP-NEXT: s_not_b64 exec, exec
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1064-DPP-NEXT: s_not_b64 exec, exec
-; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_add_f64 v[8:9], v[10:11], v[8:9]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1064-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1064-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1064-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
-; GFX1064-DPP-NEXT: v_readlane_b32 s3, v9, 0
-; GFX1064-DPP-NEXT: v_readlane_b32 s2, v8, 0
-; GFX1064-DPP-NEXT: v_readlane_b32 s5, v9, 32
-; GFX1064-DPP-NEXT: v_readlane_b32 s4, v8, 32
-; GFX1064-DPP-NEXT: v_add_f64 v[8:9], s[2:3], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1]
+; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v41, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1064-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-DPP-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1064-DPP-NEXT: .LBB10_3:
+; GFX1064-DPP-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1064-DPP-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1032-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1032-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1032-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1032-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1032-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1032-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s6
+; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s7
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1032-DPP-NEXT: v_bfrev_b32_e32 v9, 1
-; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1032-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1032-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_add_f64 v[8:9], v[10:11], v[8:9]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1032-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1032-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1032-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
-; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
+; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s44, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1032-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1032-DPP-NEXT: .LBB10_3:
+; GFX1032-DPP-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1032-DPP-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1032-DPP-NEXT: s_endpgm
;
; GFX1164-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1164-DPP: ; %bb.0:
-; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
+; GFX1164-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1164-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1164-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1164-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1164-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1164-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v0
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1164-DPP-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s6
; GFX1164-DPP-NEXT: s_mov_b32 s13, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s41, s6
+; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1164-DPP-NEXT: v_bfrev_b32_e32 v9, 1
-; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1164-DPP-NEXT: s_not_b64 exec, exec
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1164-DPP-NEXT: s_not_b64 exec, exec
-; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfff
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_add_f64 v[8:9], v[10:11], v[8:9]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1164-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
-; GFX1164-DPP-NEXT: v_permlane64_b32 v11, v9
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_permlane64_b32 v10, v8
-; GFX1164-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
-; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v41, v8
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-DPP-NEXT: .p2align 6
+; GFX1164-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1164-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-DPP-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: s_clause 0x1
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1164-DPP-NEXT: .LBB10_3:
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-DPP-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1164-DPP-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1164-DPP-NEXT: s_endpgm
;
; GFX1132-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1132-DPP: ; %bb.0:
-; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1132-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1132-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1132-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1132-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b32 s40, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s41, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1132-DPP-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
+; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
-; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
+; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v9, 1
-; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1132-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1132-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_add_f64 v[8:9], v[10:11], v[8:9]
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v11, v9 :: v_dual_mov_b32 v10, v8
-; GFX1132-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1132-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
-; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v41, v8
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s44, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-DPP-NEXT: .p2align 6
+; GFX1132-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-DPP-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1132-DPP-NEXT: s_clause 0x1
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1132-DPP-NEXT: .LBB10_3:
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-DPP-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1132-DPP-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1132-DPP-NEXT: s_endpgm
%divValue = call double @div.float.value()
%result = atomicrmw fadd ptr addrspace(1) %ptr, double %divValue syncscope("agent") monotonic, align 4
@@ -12044,932 +10556,252 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp(ptr addrspace(1) %ptr) #2 {
; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX7LESS: ; %bb.0:
-; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v5, exec_lo, 0
-; GFX7LESS-NEXT: s_bcnt1_i32_b64 s0, exec
-; GFX7LESS-NEXT: s_mov_b32 s1, 0x43300000
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v4, 0xc3300000
-; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v5, exec_hi, v5
-; GFX7LESS-NEXT: v_add_f64 v[3:4], s[0:1], v[3:4]
-; GFX7LESS-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
+; GFX7LESS-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s14, -1
+; GFX7LESS-NEXT: s_mov_b32 s15, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s12, s12, s9
+; GFX7LESS-NEXT: s_addc_u32 s13, s13, 0
+; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
+; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
+; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX7LESS-NEXT: s_cbranch_execz .LBB16_3
; GFX7LESS-NEXT: ; %bb.1:
-; GFX7LESS-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x9
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX7LESS-NEXT: s_mov_b64 s[44:45], 0
-; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, s0
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, s1
-; GFX7LESS-NEXT: v_or_b32_e32 v40, v3, v2
+; GFX7LESS-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], v[41:42]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-NEXT: s_waitcnt expcnt(2)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s42
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s43
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX7LESS-NEXT: s_cbranch_execnz .LBB16_2
-; GFX7LESS-NEXT: .LBB16_3:
+; GFX7LESS-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX7LESS-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s9
-; GFX9-NEXT: v_mov_b32_e32 v3, 0
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-NEXT: s_bcnt1_i32_b64 s0, exec
-; GFX9-NEXT: v_mov_b32_e32 v4, 0xc3300000
-; GFX9-NEXT: s_mov_b32 s1, 0x43300000
-; GFX9-NEXT: v_add_f64 v[3:4], s[0:1], v[3:4]
-; GFX9-NEXT: s_movk_i32 s32, 0x800
-; GFX9-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
-; GFX9-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX9-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s14, -1
+; GFX9-NEXT: s_mov_b32 s15, 0xe00000
+; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX9-NEXT: s_add_u32 s12, s12, s9
+; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX9-NEXT: s_addc_u32 s13, s13, 0
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-NEXT: s_cbranch_execz .LBB16_3
; GFX9-NEXT: ; %bb.1:
-; GFX9-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX9-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX9-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX9-NEXT: s_mov_b32 s33, s8
-; GFX9-NEXT: s_mov_b32 s40, s7
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX9-NEXT: s_mov_b32 s41, s6
-; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-NEXT: v_mov_b32_e32 v1, s0
-; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3
+; GFX9-NEXT: s_and_b64 vcc, exec, -1
; GFX9-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-NEXT: s_cbranch_execnz .LBB16_2
-; GFX9-NEXT: .LBB16_3:
+; GFX9-NEXT: s_mov_b64 vcc, vcc
+; GFX9-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX9-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-NEXT: s_bcnt1_i32_b64 s0, exec
-; GFX1064-NEXT: s_mov_b32 s1, 0x43300000
-; GFX1064-NEXT: s_movk_i32 s32, 0x800
-; GFX1064-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1]
-; GFX1064-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
-; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1064-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s14, -1
+; GFX1064-NEXT: s_mov_b32 s15, 0x31e16000
+; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX1064-NEXT: s_add_u32 s12, s12, s9
+; GFX1064-NEXT: s_addc_u32 s13, s13, 0
+; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-NEXT: s_cbranch_execz .LBB16_3
; GFX1064-NEXT: ; %bb.1:
-; GFX1064-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1064-NEXT: s_mov_b32 s33, s8
-; GFX1064-NEXT: s_mov_b32 s40, s7
-; GFX1064-NEXT: s_mov_b32 s41, s6
-; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1064-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: v_mov_b32_e32 v2, s1
-; GFX1064-NEXT: v_mov_b32_e32 v1, s0
+; GFX1064-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1064-NEXT: s_add_u32 s8, s36, 44
-; GFX1064-NEXT: s_addc_u32 s9, s37, 0
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-NEXT: s_cbranch_execnz .LBB16_2
-; GFX1064-NEXT: .LBB16_3:
+; GFX1064-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX1064-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-NEXT: s_bcnt1_i32_b32 s0, exec_lo
-; GFX1032-NEXT: s_mov_b32 s1, 0x43300000
-; GFX1032-NEXT: s_mov_b32 s44, 0
-; GFX1032-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1]
-; GFX1032-NEXT: s_movk_i32 s32, 0x400
-; GFX1032-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
-; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
+; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s14, -1
+; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000
+; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1032-NEXT: s_add_u32 s12, s12, s9
+; GFX1032-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB16_3
; GFX1032-NEXT: ; %bb.1:
-; GFX1032-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1032-NEXT: s_mov_b32 s33, s8
-; GFX1032-NEXT: s_mov_b32 s40, s7
-; GFX1032-NEXT: s_mov_b32 s41, s6
-; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1032-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: v_mov_b32_e32 v2, s1
-; GFX1032-NEXT: v_mov_b32_e32 v1, s0
+; GFX1032-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1032-NEXT: s_add_u32 s8, s36, 44
-; GFX1032-NEXT: s_addc_u32 s9, s37, 0
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-NEXT: s_cbranch_execnz .LBB16_2
-; GFX1032-NEXT: .LBB16_3:
+; GFX1032-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX1032-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX1032-NEXT: s_endpgm
;
; GFX1164-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX1164: ; %bb.0:
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[0:1]
+; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-NEXT: s_bcnt1_i32_b64 s0, exec
-; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: v_mov_b32_e32 v0, 0x43300000
-; GFX1164-NEXT: v_mov_b32_e32 v1, s0
-; GFX1164-NEXT: s_mov_b32 s32, 32
-; GFX1164-NEXT: s_clause 0x1
-; GFX1164-NEXT: scratch_store_b32 off, v0, off offset:20
-; GFX1164-NEXT: scratch_store_b32 off, v1, off offset:16
-; GFX1164-NEXT: scratch_load_b64 v[0:1], off, off offset:16
+; GFX1164-NEXT: v_mov_b32_e32 v1, 0x43300000
+; GFX1164-NEXT: v_mov_b32_e32 v2, s0
; GFX1164-NEXT: s_mov_b64 s[0:1], exec
-; GFX1164-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1164-NEXT: v_mul_f64 v[41:42], 4.0, v[0:1]
-; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1164-NEXT: s_clause 0x1
+; GFX1164-NEXT: scratch_store_b32 off, v1, off offset:4
+; GFX1164-NEXT: scratch_store_b32 off, v2, off
; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-NEXT: s_cbranch_execz .LBB16_3
; GFX1164-NEXT: ; %bb.1:
-; GFX1164-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1164-NEXT: s_mov_b32 s33, s8
-; GFX1164-NEXT: s_mov_b32 s40, s7
-; GFX1164-NEXT: s_mov_b32 s41, s6
-; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: v_mov_b32_e32 v2, s1
-; GFX1164-NEXT: v_mov_b32_e32 v1, s0
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-NEXT: .p2align 6
+; GFX1164-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1164-NEXT: s_add_u32 s8, s36, 44
-; GFX1164-NEXT: s_addc_u32 s9, s37, 0
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-NEXT: s_mov_b32 s12, s41
-; GFX1164-NEXT: s_mov_b32 s13, s40
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: s_clause 0x1
-; GFX1164-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-NEXT: s_cbranch_execnz .LBB16_2
-; GFX1164-NEXT: .LBB16_3:
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX1164-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX1164-NEXT: s_endpgm
;
; GFX1132-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX1132: ; %bb.0:
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[0:1]
; GFX1132-NEXT: s_bcnt1_i32_b32 s0, exec_lo
-; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v1, s0
-; GFX1132-NEXT: v_mov_b32_e32 v0, 0x43300000
-; GFX1132-NEXT: s_mov_b32 s44, 0
-; GFX1132-NEXT: s_clause 0x1
-; GFX1132-NEXT: scratch_store_b32 off, v0, off offset:20
-; GFX1132-NEXT: scratch_store_b32 off, v1, off offset:16
-; GFX1132-NEXT: scratch_load_b64 v[0:1], off, off offset:16
-; GFX1132-NEXT: s_mov_b32 s32, 32
-; GFX1132-NEXT: s_mov_b32 s0, exec_lo
-; GFX1132-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1132-NEXT: v_mul_f64 v[41:42], 4.0, v[0:1]
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132-NEXT: v_dual_mov_b32 v1, 0x43300000 :: v_dual_mov_b32 v2, s0
+; GFX1132-NEXT: s_mov_b32 s0, exec_lo
+; GFX1132-NEXT: s_clause 0x1
+; GFX1132-NEXT: scratch_store_b32 off, v1, off offset:4
+; GFX1132-NEXT: scratch_store_b32 off, v2, off
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_cbranch_execz .LBB16_3
; GFX1132-NEXT: ; %bb.1:
-; GFX1132-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: s_mov_b32 s40, s14
-; GFX1132-NEXT: s_mov_b32 s41, s13
-; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-NEXT: .p2align 6
+; GFX1132-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1132-NEXT: s_add_u32 s8, s36, 44
-; GFX1132-NEXT: s_addc_u32 s9, s37, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-NEXT: s_mov_b32 s12, s41
-; GFX1132-NEXT: s_mov_b32 s13, s40
-; GFX1132-NEXT: s_mov_b32 s14, s33
-; GFX1132-NEXT: s_clause 0x1
-; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-NEXT: s_cbranch_execnz .LBB16_2
-; GFX1132-NEXT: .LBB16_3:
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX1132-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX1132-NEXT: s_endpgm
;
; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX7LESS-DPP: ; %bb.0:
-; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v5, exec_lo, 0
-; GFX7LESS-DPP-NEXT: s_bcnt1_i32_b64 s0, exec
-; GFX7LESS-DPP-NEXT: s_mov_b32 s1, 0x43300000
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, 0xc3300000
-; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v5, exec_hi, v5
-; GFX7LESS-DPP-NEXT: v_add_f64 v[3:4], s[0:1], v[3:4]
-; GFX7LESS-DPP-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s14, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s15, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s12, s12, s9
+; GFX7LESS-DPP-NEXT: s_addc_u32 s13, s13, 0
+; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
+; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
+; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX7LESS-DPP-NEXT: ; %bb.1:
-; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x9
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, s0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, s1
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v3, v2
+; GFX7LESS-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-DPP-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], v[41:42]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(2)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB16_2
-; GFX7LESS-DPP-NEXT: .LBB16_3:
+; GFX7LESS-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-DPP-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX7LESS-DPP-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, 0
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s0, exec
-; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0xc3300000
-; GFX9-DPP-NEXT: s_mov_b32 s1, 0x43300000
-; GFX9-DPP-NEXT: v_add_f64 v[3:4], s[0:1], v[3:4]
-; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX9-DPP-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
-; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX9-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s14, -1
+; GFX9-DPP-NEXT: s_mov_b32 s15, 0xe00000
+; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX9-DPP-NEXT: s_add_u32 s12, s12, s9
+; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX9-DPP-NEXT: s_addc_u32 s13, s13, 0
+; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX9-DPP-NEXT: s_mov_b32 s33, s8
-; GFX9-DPP-NEXT: s_mov_b32 s40, s7
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX9-DPP-NEXT: s_mov_b32 s41, s6
-; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0
-; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
+; GFX9-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX9-DPP-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX9-DPP-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-DPP-NEXT: s_cbranch_execnz .LBB16_2
-; GFX9-DPP-NEXT: .LBB16_3:
+; GFX9-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX9-DPP-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX9-DPP-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s0, exec
-; GFX1064-DPP-NEXT: s_mov_b32 s1, 0x43300000
-; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX1064-DPP-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1]
-; GFX1064-DPP-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
-; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1064-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s14, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s15, 0x31e16000
+; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX1064-DPP-NEXT: s_add_u32 s12, s12, s9
+; GFX1064-DPP-NEXT: s_addc_u32 s13, s13, 0
+; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1064-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1064-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1064-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, s0
+; GFX1064-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-DPP-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-DPP-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1064-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB16_2
-; GFX1064-DPP-NEXT: .LBB16_3:
+; GFX1064-DPP-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX1064-DPP-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-DPP-NEXT: s_bcnt1_i32_b32 s0, exec_lo
-; GFX1032-DPP-NEXT: s_mov_b32 s1, 0x43300000
-; GFX1032-DPP-NEXT: s_mov_b32 s44, 0
-; GFX1032-DPP-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1]
-; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
-; GFX1032-DPP-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
-; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
+; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s14, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000
+; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s9
+; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1032-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1032-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1032-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, s0
+; GFX1032-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-DPP-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1032-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB16_2
-; GFX1032-DPP-NEXT: .LBB16_3:
+; GFX1032-DPP-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX1032-DPP-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX1032-DPP-NEXT: s_endpgm
;
; GFX1164-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX1164-DPP: ; %bb.0:
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
+; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-DPP-NEXT: s_bcnt1_i32_b64 s0, exec
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0x43300000
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0
-; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
-; GFX1164-DPP-NEXT: s_clause 0x1
-; GFX1164-DPP-NEXT: scratch_store_b32 off, v0, off offset:20
-; GFX1164-DPP-NEXT: scratch_store_b32 off, v1, off offset:16
-; GFX1164-DPP-NEXT: scratch_load_b64 v[0:1], off, off offset:16
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0x43300000
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s0
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
-; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_mul_f64 v[41:42], 4.0, v[0:1]
-; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1164-DPP-NEXT: s_clause 0x1
+; GFX1164-DPP-NEXT: scratch_store_b32 off, v1, off offset:4
+; GFX1164-DPP-NEXT: scratch_store_b32 off, v2, off
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1164-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1164-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-DPP-NEXT: .p2align 6
+; GFX1164-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-DPP-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX1164-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1164-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: s_clause 0x1
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB16_2
-; GFX1164-DPP-NEXT: .LBB16_3:
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-DPP-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX1164-DPP-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX1164-DPP-NEXT: s_endpgm
;
; GFX1132-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX1132-DPP: ; %bb.0:
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
; GFX1132-DPP-NEXT: s_bcnt1_i32_b32 s0, exec_lo
-; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v1, s0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0x43300000
-; GFX1132-DPP-NEXT: s_mov_b32 s44, 0
-; GFX1132-DPP-NEXT: s_clause 0x1
-; GFX1132-DPP-NEXT: scratch_store_b32 off, v0, off offset:20
-; GFX1132-DPP-NEXT: scratch_store_b32 off, v1, off offset:16
-; GFX1132-DPP-NEXT: scratch_load_b64 v[0:1], off, off offset:16
-; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
-; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
-; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_mul_f64 v[41:42], 4.0, v[0:1]
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0x43300000 :: v_dual_mov_b32 v2, s0
+; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
+; GFX1132-DPP-NEXT: s_clause 0x1
+; GFX1132-DPP-NEXT: scratch_store_b32 off, v1, off offset:4
+; GFX1132-DPP-NEXT: scratch_store_b32 off, v2, off
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s40, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s41, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-DPP-NEXT: .p2align 6
+; GFX1132-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-DPP-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1132-DPP-NEXT: s_clause 0x1
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB16_2
-; GFX1132-DPP-NEXT: .LBB16_3:
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-DPP-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX1132-DPP-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX1132-DPP-NEXT: s_endpgm
%result = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 monotonic, align 4
ret void
@@ -12978,42 +10810,35 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp(ptr addrspace(1) %ptr) #2 {
; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX7LESS: ; %bb.0:
-; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX7LESS-NEXT: s_movk_i32 s32, 0x400
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s9
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s14, s8
+; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX7LESS-NEXT: s_add_u32 s8, s2, 44
+; GFX7LESS-NEXT: s_addc_u32 s9, s3, 0
+; GFX7LESS-NEXT: s_getpc_b64 s[2:3]
+; GFX7LESS-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX7LESS-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
+; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX7LESS-NEXT: s_mov_b32 s12, s6
+; GFX7LESS-NEXT: s_mov_b32 s13, s7
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
-; GFX7LESS-NEXT: v_mov_b32_e32 v41, 0
-; GFX7LESS-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0
+; GFX7LESS-NEXT: v_bfrev_b32_e32 v3, 1
; GFX7LESS-NEXT: .LBB17_1: ; %ComputeLoop
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_ff1_i32_b64 s4, s[0:1]
@@ -13023,7 +10848,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX7LESS-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5]
; GFX7LESS-NEXT: v_cmp_ne_u64_e64 s[4:5], s[0:1], 0
; GFX7LESS-NEXT: s_and_b64 vcc, exec, s[4:5]
-; GFX7LESS-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX7LESS-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX7LESS-NEXT: s_cbranch_vccnz .LBB17_1
; GFX7LESS-NEXT: ; %bb.2: ; %ComputeEnd
; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
@@ -13033,99 +10858,50 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB17_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s46, -1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-NEXT: s_mov_b64 s[42:43], 0
+; GFX7LESS-NEXT: s_and_b64 vcc, exec, 0
; GFX7LESS-NEXT: .LBB17_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], v[41:42]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-NEXT: s_waitcnt expcnt(2)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[42:43], vcc, s[42:43]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[42:43]
-; GFX7LESS-NEXT: s_cbranch_execnz .LBB17_4
-; GFX7LESS-NEXT: .LBB17_5:
+; GFX7LESS-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-NEXT: s_cbranch_vccz .LBB17_4
+; GFX7LESS-NEXT: .LBB17_5: ; %UnifiedReturnBlock
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s9
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-NEXT: s_mov_b32 s33, s8
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX9-NEXT: s_mov_b32 s40, s7
-; GFX9-NEXT: s_mov_b32 s41, s6
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s9
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_add_u32 s8, s2, 44
+; GFX9-NEXT: s_addc_u32 s9, s3, 0
+; GFX9-NEXT: s_getpc_b64 s[2:3]
+; GFX9-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX9-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
+; GFX9-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
+; GFX9-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: s_movk_i32 s32, 0x800
+; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: v_mov_b32_e32 v41, 0
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_mov_b64 s[0:1], exec
-; GFX9-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX9-NEXT: v_bfrev_b32_e32 v3, 1
; GFX9-NEXT: .LBB17_1: ; %ComputeLoop
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: s_ff1_i32_b64 s4, s[0:1]
; GFX9-NEXT: v_readlane_b32 s3, v1, s4
; GFX9-NEXT: v_readlane_b32 s2, v0, s4
-; GFX9-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX9-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX9-NEXT: s_lshl_b64 s[2:3], 1, s4
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3]
; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0
@@ -13138,96 +10914,50 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX9-NEXT: s_cbranch_execz .LBB17_5
; GFX9-NEXT: ; %bb.3:
-; GFX9-NEXT: s_load_dwordx2 s[42:43], s[36:37], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX9-NEXT: s_and_b64 vcc, exec, 0
; GFX9-NEXT: .LBB17_4: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-NEXT: s_cbranch_execnz .LBB17_4
-; GFX9-NEXT: .LBB17_5:
+; GFX9-NEXT: s_mov_b64 vcc, vcc
+; GFX9-NEXT: s_cbranch_vccz .LBB17_4
+; GFX9-NEXT: .LBB17_5: ; %UnifiedReturnBlock
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b32 s33, s8
-; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX1064-NEXT: s_mov_b32 s40, s7
-; GFX1064-NEXT: s_mov_b32 s41, s6
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s9
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_mov_b32 s14, s8
+; GFX1064-NEXT: s_add_u32 s8, s2, 44
+; GFX1064-NEXT: s_addc_u32 s9, s3, 0
+; GFX1064-NEXT: s_getpc_b64 s[2:3]
+; GFX1064-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1064-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1064-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: s_movk_i32 s32, 0x800
+; GFX1064-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b32 s12, s6
+; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1064-NEXT: s_mov_b32 s13, s7
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: v_mov_b32_e32 v41, 0
-; GFX1064-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1064-NEXT: v_mov_b32_e32 v2, 0
+; GFX1064-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1064-NEXT: s_mov_b64 s[0:1], exec
; GFX1064-NEXT: .LBB17_1: ; %ComputeLoop
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-NEXT: s_ff1_i32_b64 s4, s[0:1]
; GFX1064-NEXT: v_readlane_b32 s3, v1, s4
; GFX1064-NEXT: v_readlane_b32 s2, v0, s4
-; GFX1064-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX1064-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX1064-NEXT: s_lshl_b64 s[2:3], 1, s4
; GFX1064-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3]
; GFX1064-NEXT: s_cmp_lg_u64 s[0:1], 0
@@ -13240,90 +10970,42 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1064-NEXT: s_cbranch_execz .LBB17_5
; GFX1064-NEXT: ; %bb.3:
-; GFX1064-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1064-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1064-NEXT: s_and_b64 vcc, exec, 0
; GFX1064-NEXT: .LBB17_4: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-NEXT: s_cbranch_execnz .LBB17_4
-; GFX1064-NEXT: .LBB17_5:
+; GFX1064-NEXT: s_cbranch_vccz .LBB17_4
+; GFX1064-NEXT: .LBB17_5: ; %UnifiedReturnBlock
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b32 s33, s8
-; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX1032-NEXT: s_mov_b32 s40, s7
-; GFX1032-NEXT: s_mov_b32 s41, s6
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s9
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_mov_b32 s14, s8
+; GFX1032-NEXT: s_add_u32 s8, s2, 44
+; GFX1032-NEXT: s_addc_u32 s9, s3, 0
+; GFX1032-NEXT: s_getpc_b64 s[2:3]
+; GFX1032-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1032-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1032-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b32 s12, s6
+; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1032-NEXT: s_mov_b32 s13, s7
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-NEXT: v_mov_b32_e32 v41, 0
-; GFX1032-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1032-NEXT: v_mov_b32_e32 v2, 0
+; GFX1032-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-NEXT: .LBB17_1: ; %ComputeLoop
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -13332,90 +11014,42 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1032-NEXT: v_readlane_b32 s2, v0, s1
; GFX1032-NEXT: s_lshl_b32 s1, 1, s1
; GFX1032-NEXT: s_andn2_b32 s0, s0, s1
-; GFX1032-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX1032-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX1032-NEXT: s_cmp_lg_u32 s0, 0
; GFX1032-NEXT: s_cbranch_scc1 .LBB17_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s44, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1032-NEXT: s_cbranch_execz .LBB17_5
; GFX1032-NEXT: ; %bb.3:
-; GFX1032-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1032-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1032-NEXT: s_mov_b32 vcc_lo, 0
; GFX1032-NEXT: .LBB17_4: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-NEXT: s_cbranch_execnz .LBB17_4
-; GFX1032-NEXT: .LBB17_5:
+; GFX1032-NEXT: s_cbranch_vccz .LBB17_4
+; GFX1032-NEXT: .LBB17_5: ; %UnifiedReturnBlock
; GFX1032-NEXT: s_endpgm
;
; GFX1164-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX1164: ; %bb.0:
-; GFX1164-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1164-NEXT: s_mov_b32 s33, s8
-; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
+; GFX1164-NEXT: s_mov_b32 s14, s8
+; GFX1164-NEXT: s_add_u32 s8, s2, 44
+; GFX1164-NEXT: s_addc_u32 s9, s3, 0
+; GFX1164-NEXT: s_getpc_b64 s[2:3]
+; GFX1164-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1164-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1164-NEXT: v_mov_b32_e32 v31, v0
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1164-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1164-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b32 s12, s6
; GFX1164-NEXT: s_mov_b32 s13, s7
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: s_mov_b32 s32, 32
-; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b32 s40, s7
-; GFX1164-NEXT: s_mov_b32 s41, s6
+; GFX1164-NEXT: s_mov_b32 s32, 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-NEXT: v_mov_b32_e32 v41, 0
-; GFX1164-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX1164-NEXT: s_swappc_b64 s[30:31], s[2:3]
+; GFX1164-NEXT: v_mov_b32_e32 v2, 0
+; GFX1164-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1164-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-NEXT: .LBB17_1: ; %ComputeLoop
; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -13424,7 +11058,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1164-NEXT: v_readlane_b32 s3, v1, s4
; GFX1164-NEXT: v_readlane_b32 s2, v0, s4
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX1164-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX1164-NEXT: s_lshl_b64 s[2:3], 1, s4
; GFX1164-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -13439,79 +11073,32 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1164-NEXT: s_cbranch_execz .LBB17_5
; GFX1164-NEXT: ; %bb.3:
-; GFX1164-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1164-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-NEXT: .p2align 6
+; GFX1164-NEXT: s_and_b64 vcc, exec, 0
; GFX1164-NEXT: .LBB17_4: ; %atomicrmw.start
; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s41
-; GFX1164-NEXT: s_mov_b32 s13, s40
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: s_clause 0x1
-; GFX1164-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-NEXT: s_cbranch_execnz .LBB17_4
-; GFX1164-NEXT: .LBB17_5:
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-NEXT: s_cbranch_vccz .LBB17_4
+; GFX1164-NEXT: .LBB17_5: ; %UnifiedReturnBlock
; GFX1164-NEXT: s_endpgm
;
; GFX1132-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX1132: ; %bb.0:
-; GFX1132-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_add_u32 s8, s2, 44
+; GFX1132-NEXT: s_addc_u32 s9, s3, 0
+; GFX1132-NEXT: s_getpc_b64 s[2:3]
+; GFX1132-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1132-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1132-NEXT: v_mov_b32_e32 v31, v0
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b32 s40, s14
-; GFX1132-NEXT: s_mov_b32 s41, s13
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1132-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
+; GFX1132-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b32 s13, s14
; GFX1132-NEXT: s_mov_b32 s14, s15
-; GFX1132-NEXT: s_mov_b32 s32, 32
-; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: v_mov_b32_e32 v40, v0
+; GFX1132-NEXT: s_mov_b32 s32, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-NEXT: v_mov_b32_e32 v41, 0
-; GFX1132-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[2:3]
+; GFX1132-NEXT: v_mov_b32_e32 v2, 0
+; GFX1132-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: .LBB17_1: ; %ComputeLoop
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -13522,780 +11109,239 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1132-NEXT: s_lshl_b32 s1, 1, s1
; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1132-NEXT: s_and_not1_b32 s0, s0, s1
-; GFX1132-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX1132-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX1132-NEXT: s_cmp_lg_u32 s0, 0
; GFX1132-NEXT: s_cbranch_scc1 .LBB17_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s44, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1132-NEXT: s_cbranch_execz .LBB17_5
; GFX1132-NEXT: ; %bb.3:
-; GFX1132-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1132-NEXT: v_mov_b32_e32 v0, 0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-NEXT: .p2align 6
+; GFX1132-NEXT: s_mov_b32 vcc_lo, 0
; GFX1132-NEXT: .LBB17_4: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1132-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-NEXT: s_mov_b32 s12, s41
-; GFX1132-NEXT: s_mov_b32 s13, s40
-; GFX1132-NEXT: s_mov_b32 s14, s33
-; GFX1132-NEXT: s_clause 0x1
-; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-NEXT: s_cbranch_execnz .LBB17_4
-; GFX1132-NEXT: .LBB17_5:
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-NEXT: s_cbranch_vccz .LBB17_4
+; GFX1132-NEXT: .LBB17_5: ; %UnifiedReturnBlock
; GFX1132-NEXT: s_endpgm
;
; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX7LESS-DPP: ; %bb.0:
-; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[2:3], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x400
+; GFX7LESS-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX7LESS-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s8
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX7LESS-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX7LESS-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX7LESS-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX7LESS-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v42, v0, v2
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s6
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s7
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v41, v1
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[42:43], 0
+; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX7LESS-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-DPP-NEXT: .LBB17_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], v[40:41]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(2)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[42:43], vcc, s[42:43]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[42:43]
-; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB17_1
-; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX7LESS-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-DPP-NEXT: s_cbranch_vccnz .LBB17_1
+; GFX7LESS-DPP-NEXT: ; %bb.2: ; %DummyReturnBlock
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b32 s33, s8
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_mov_b32 s40, s7
-; GFX9-DPP-NEXT: s_mov_b32 s41, s6
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s14, s8
+; GFX9-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX9-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX9-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX9-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX9-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
+; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
+; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-DPP-NEXT: s_mov_b32 s12, s6
+; GFX9-DPP-NEXT: s_mov_b32 s13, s7
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX9-DPP-NEXT: v_bfrev_b32_e32 v9, 1
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX9-DPP-NEXT: s_not_b64 exec, exec
-; GFX9-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX9-DPP-NEXT: s_not_b64 exec, exec
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:1 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:1 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:2 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:2 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:8 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:8 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_bcast:15 row_mask:0xa bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_bcast:15 row_mask:0xa bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: s_nop 1
-; GFX9-DPP-NEXT: v_mov_b32_dpp v8, v10 row_bcast:31 row_mask:0xc bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v9, v11 row_bcast:31 row_mask:0xc bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[8:9], v[10:11], v[8:9]
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
+; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_readlane_b32 s43, v9, 63
-; GFX9-DPP-NEXT: v_readlane_b32 s42, v8, 63
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX9-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX9-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX9-DPP-NEXT: v_add_f64 v[3:4], v[1:2], s[42:43]
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45
-; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
-; GFX9-DPP-NEXT: s_cbranch_execnz .LBB17_2
-; GFX9-DPP-NEXT: .LBB17_3:
+; GFX9-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX9-DPP-NEXT: s_cbranch_vccnz .LBB17_2
+; GFX9-DPP-NEXT: .LBB17_3: ; %UnifiedReturnBlock
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1064-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1064-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1064-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1064-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1064-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1064-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
+; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s6
+; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s7
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1064-DPP-NEXT: v_bfrev_b32_e32 v9, 1
-; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1064-DPP-NEXT: s_not_b64 exec, exec
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1064-DPP-NEXT: s_not_b64 exec, exec
-; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_add_f64 v[8:9], v[10:11], v[8:9]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1064-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1064-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1064-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
-; GFX1064-DPP-NEXT: v_readlane_b32 s3, v9, 0
-; GFX1064-DPP-NEXT: v_readlane_b32 s2, v8, 0
-; GFX1064-DPP-NEXT: v_readlane_b32 s5, v9, 32
-; GFX1064-DPP-NEXT: v_readlane_b32 s4, v8, 32
-; GFX1064-DPP-NEXT: v_add_f64 v[8:9], s[2:3], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1]
+; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v41, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1064-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-DPP-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB17_2
-; GFX1064-DPP-NEXT: .LBB17_3:
+; GFX1064-DPP-NEXT: s_cbranch_vccnz .LBB17_2
+; GFX1064-DPP-NEXT: .LBB17_3: ; %UnifiedReturnBlock
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1032-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1032-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1032-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1032-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1032-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1032-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s6
+; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s7
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1032-DPP-NEXT: v_bfrev_b32_e32 v9, 1
-; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1032-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1032-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_add_f64 v[8:9], v[10:11], v[8:9]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1032-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1032-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1032-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
-; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
+; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s44, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1032-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB17_2
-; GFX1032-DPP-NEXT: .LBB17_3:
+; GFX1032-DPP-NEXT: s_cbranch_vccnz .LBB17_2
+; GFX1032-DPP-NEXT: .LBB17_3: ; %UnifiedReturnBlock
; GFX1032-DPP-NEXT: s_endpgm
;
; GFX1164-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX1164-DPP: ; %bb.0:
-; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
+; GFX1164-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1164-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1164-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1164-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1164-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1164-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v0
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1164-DPP-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s6
; GFX1164-DPP-NEXT: s_mov_b32 s13, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s41, s6
+; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1164-DPP-NEXT: v_bfrev_b32_e32 v9, 1
-; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1164-DPP-NEXT: s_not_b64 exec, exec
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1164-DPP-NEXT: s_not_b64 exec, exec
-; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfff
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_add_f64 v[8:9], v[10:11], v[8:9]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1164-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
-; GFX1164-DPP-NEXT: v_permlane64_b32 v11, v9
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_permlane64_b32 v10, v8
-; GFX1164-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
-; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v41, v8
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-DPP-NEXT: .p2align 6
+; GFX1164-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
; GFX1164-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-DPP-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: s_clause 0x1
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB17_2
-; GFX1164-DPP-NEXT: .LBB17_3:
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-DPP-NEXT: s_cbranch_vccnz .LBB17_2
+; GFX1164-DPP-NEXT: .LBB17_3: ; %UnifiedReturnBlock
; GFX1164-DPP-NEXT: s_endpgm
;
; GFX1132-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX1132-DPP: ; %bb.0:
-; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1132-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1132-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1132-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1132-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b32 s40, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s41, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1132-DPP-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
+; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
-; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
+; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v9, 1
-; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1132-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1132-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_add_f64 v[8:9], v[10:11], v[8:9]
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v11, v9 :: v_dual_mov_b32 v10, v8
-; GFX1132-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1132-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
-; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v41, v8
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s44, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-DPP-NEXT: .p2align 6
+; GFX1132-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-DPP-NEXT: v_add_f64 v[3:4], v[1:2], v[41:42]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1132-DPP-NEXT: s_clause 0x1
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB17_2
-; GFX1132-DPP-NEXT: .LBB17_3:
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-DPP-NEXT: s_cbranch_vccnz .LBB17_2
+; GFX1132-DPP-NEXT: .LBB17_3: ; %UnifiedReturnBlock
; GFX1132-DPP-NEXT: s_endpgm
%divValue = call double @div.float.value() strictfp
%result = atomicrmw fadd ptr addrspace(1) %ptr, double %divValue monotonic, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
index 954d810d7aa4e3..0605cac487d658 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
@@ -3662,862 +3662,198 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr) #0 {
; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
-; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0
-; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX7LESS-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s14, -1
+; GFX7LESS-NEXT: s_mov_b32 s15, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s12, s12, s9
+; GFX7LESS-NEXT: s_addc_u32 s13, s13, 0
+; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
+; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
+; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX7LESS-NEXT: s_cbranch_execz .LBB6_3
; GFX7LESS-NEXT: ; %bb.1:
-; GFX7LESS-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x9
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX7LESS-NEXT: s_mov_b64 s[44:45], 0
-; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, s0
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, s1
-; GFX7LESS-NEXT: v_or_b32_e32 v40, v3, v2
+; GFX7LESS-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_max_f64 v[0:1], v[2:3], 4.0
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s42
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s43
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX7LESS-NEXT: s_cbranch_execnz .LBB6_2
-; GFX7LESS-NEXT: .LBB6_3:
+; GFX7LESS-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX7LESS-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX9-NEXT: s_add_u32 s48, s48, s9
-; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GFX9-NEXT: s_movk_i32 s32, 0x800
+; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-NEXT: s_cbranch_execz .LBB6_3
; GFX9-NEXT: ; %bb.1:
-; GFX9-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX9-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX9-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX9-NEXT: s_mov_b32 s33, s8
-; GFX9-NEXT: s_mov_b32 s40, s7
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX9-NEXT: s_mov_b32 s41, s6
-; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-NEXT: v_mov_b32_e32 v1, s0
-; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3
+; GFX9-NEXT: s_and_b64 vcc, exec, -1
; GFX9-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-NEXT: s_cbranch_execnz .LBB6_2
-; GFX9-NEXT: .LBB6_3:
+; GFX9-NEXT: s_mov_b64 vcc, vcc
+; GFX9-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX9-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX1064-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-NEXT: s_movk_i32 s32, 0x800
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-NEXT: s_cbranch_execz .LBB6_3
; GFX1064-NEXT: ; %bb.1:
-; GFX1064-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1064-NEXT: s_mov_b32 s33, s8
-; GFX1064-NEXT: s_mov_b32 s40, s7
-; GFX1064-NEXT: s_mov_b32 s41, s6
-; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1064-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: v_mov_b32_e32 v2, s1
-; GFX1064-NEXT: v_mov_b32_e32 v1, s0
+; GFX1064-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1064-NEXT: s_add_u32 s8, s36, 44
-; GFX1064-NEXT: s_addc_u32 s9, s37, 0
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-NEXT: s_cbranch_execnz .LBB6_2
-; GFX1064-NEXT: .LBB6_3:
+; GFX1064-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX1064-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-NEXT: s_mov_b32 s44, 0
-; GFX1032-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB6_3
; GFX1032-NEXT: ; %bb.1:
-; GFX1032-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1032-NEXT: s_mov_b32 s33, s8
-; GFX1032-NEXT: s_mov_b32 s40, s7
-; GFX1032-NEXT: s_mov_b32 s41, s6
-; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1032-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: v_mov_b32_e32 v2, s1
-; GFX1032-NEXT: v_mov_b32_e32 v1, s0
+; GFX1032-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1032-NEXT: s_add_u32 s8, s36, 44
-; GFX1032-NEXT: s_addc_u32 s9, s37, 0
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-NEXT: s_cbranch_execnz .LBB6_2
-; GFX1032-NEXT: .LBB6_3:
+; GFX1032-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX1032-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX1032-NEXT: s_endpgm
;
; GFX1164-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1164: ; %bb.0:
-; GFX1164-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-NEXT: s_cbranch_execz .LBB6_3
; GFX1164-NEXT: ; %bb.1:
-; GFX1164-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1164-NEXT: s_mov_b32 s33, s8
-; GFX1164-NEXT: s_mov_b32 s40, s7
-; GFX1164-NEXT: s_mov_b32 s41, s6
-; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: v_mov_b32_e32 v2, s1
-; GFX1164-NEXT: v_mov_b32_e32 v1, s0
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-NEXT: .p2align 6
+; GFX1164-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1164-NEXT: s_add_u32 s8, s36, 44
-; GFX1164-NEXT: s_addc_u32 s9, s37, 0
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-NEXT: s_mov_b32 s12, s41
-; GFX1164-NEXT: s_mov_b32 s13, s40
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1164-NEXT: s_clause 0x1
-; GFX1164-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-NEXT: s_cbranch_execnz .LBB6_2
-; GFX1164-NEXT: .LBB6_3:
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX1164-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX1164-NEXT: s_endpgm
;
; GFX1132-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1132: ; %bb.0:
-; GFX1132-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-NEXT: s_mov_b32 s44, 0
-; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_cbranch_execz .LBB6_3
; GFX1132-NEXT: ; %bb.1:
-; GFX1132-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: s_mov_b32 s40, s14
-; GFX1132-NEXT: s_mov_b32 s41, s13
-; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-NEXT: .p2align 6
+; GFX1132-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1132-NEXT: s_add_u32 s8, s36, 44
-; GFX1132-NEXT: s_addc_u32 s9, s37, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-NEXT: s_mov_b32 s12, s41
-; GFX1132-NEXT: s_mov_b32 s13, s40
-; GFX1132-NEXT: s_mov_b32 s14, s33
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX1132-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1132-NEXT: s_clause 0x1
-; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-NEXT: s_cbranch_execnz .LBB6_2
-; GFX1132-NEXT: .LBB6_3:
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX1132-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX1132-NEXT: s_endpgm
;
; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
-; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0
-; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s14, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s15, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s12, s12, s9
+; GFX7LESS-DPP-NEXT: s_addc_u32 s13, s13, 0
+; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
+; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
+; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX7LESS-DPP-NEXT: ; %bb.1:
-; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x9
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, s0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, s1
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v3, v2
+; GFX7LESS-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-DPP-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-DPP-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], 4.0
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB6_2
-; GFX7LESS-DPP-NEXT: .LBB6_3:
+; GFX7LESS-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-DPP-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX7LESS-DPP-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
+; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX9-DPP-NEXT: s_mov_b32 s33, s8
-; GFX9-DPP-NEXT: s_mov_b32 s40, s7
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX9-DPP-NEXT: s_mov_b32 s41, s6
-; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0
-; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
+; GFX9-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX9-DPP-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-DPP-NEXT: s_cbranch_execnz .LBB6_2
-; GFX9-DPP-NEXT: .LBB6_3:
+; GFX9-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX9-DPP-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX9-DPP-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1064-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1064-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1064-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, s0
+; GFX1064-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-DPP-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1064-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB6_2
-; GFX1064-DPP-NEXT: .LBB6_3:
+; GFX1064-DPP-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX1064-DPP-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-DPP-NEXT: s_mov_b32 s44, 0
-; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1032-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1032-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1032-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, s0
+; GFX1032-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-DPP-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1032-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB6_2
-; GFX1032-DPP-NEXT: .LBB6_3:
+; GFX1032-DPP-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX1032-DPP-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX1032-DPP-NEXT: s_endpgm
;
; GFX1164-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1164-DPP: ; %bb.0:
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1164-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1164-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-DPP-NEXT: .p2align 6
+; GFX1164-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-DPP-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX1164-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1164-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1164-DPP-NEXT: s_clause 0x1
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB6_2
-; GFX1164-DPP-NEXT: .LBB6_3:
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-DPP-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX1164-DPP-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX1164-DPP-NEXT: s_endpgm
;
; GFX1132-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1132-DPP: ; %bb.0:
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-DPP-NEXT: s_mov_b32 s44, 0
-; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s40, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s41, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-DPP-NEXT: .p2align 6
+; GFX1132-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-DPP-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1132-DPP-NEXT: s_clause 0x1
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB6_2
-; GFX1132-DPP-NEXT: .LBB6_3:
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-DPP-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX1132-DPP-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX1132-DPP-NEXT: s_endpgm
%result = atomicrmw fmax ptr addrspace(1) %ptr, double 4.0 syncscope("agent") monotonic, align 4
ret void
@@ -4526,39 +3862,32 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr) #0 {
; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
-; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX7LESS-NEXT: s_movk_i32 s32, 0x400
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s9
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s14, s8
+; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX7LESS-NEXT: s_add_u32 s8, s2, 44
+; GFX7LESS-NEXT: s_addc_u32 s9, s3, 0
+; GFX7LESS-NEXT: s_getpc_b64 s[2:3]
+; GFX7LESS-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX7LESS-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
+; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX7LESS-NEXT: s_mov_b32 s12, s6
+; GFX7LESS-NEXT: s_mov_b32 s13, s7
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v3, 0x7ff80000
@@ -4583,92 +3912,41 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB7_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s46, -1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-NEXT: s_mov_b64 s[42:43], 0
-; GFX7LESS-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
+; GFX7LESS-NEXT: s_and_b64 vcc, exec, 0
; GFX7LESS-NEXT: .LBB7_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_max_f64 v[0:1], v[2:3], v[41:42]
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[42:43], vcc, s[42:43]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[42:43]
-; GFX7LESS-NEXT: s_cbranch_execnz .LBB7_4
-; GFX7LESS-NEXT: .LBB7_5:
+; GFX7LESS-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-NEXT: s_cbranch_vccz .LBB7_4
+; GFX7LESS-NEXT: .LBB7_5: ; %UnifiedReturnBlock
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s9
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-NEXT: s_mov_b32 s33, s8
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX9-NEXT: s_mov_b32 s40, s7
-; GFX9-NEXT: s_mov_b32 s41, s6
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s9
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_add_u32 s8, s2, 44
+; GFX9-NEXT: s_addc_u32 s9, s3, 0
+; GFX9-NEXT: s_getpc_b64 s[2:3]
+; GFX9-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX9-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
+; GFX9-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
+; GFX9-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: s_movk_i32 s32, 0x800
+; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_mov_b64 s[0:1], exec
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000
@@ -4692,103 +3970,55 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX9-NEXT: s_cbranch_execz .LBB7_5
; GFX9-NEXT: ; %bb.3:
-; GFX9-NEXT: s_load_dwordx2 s[42:43], s[36:37], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX9-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[42:43]
+; GFX9-NEXT: s_and_b64 vcc, exec, 0
; GFX9-NEXT: .LBB7_4: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5]
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX9-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: buffer_load_dword v4, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-NEXT: s_cbranch_execnz .LBB7_4
-; GFX9-NEXT: .LBB7_5:
+; GFX9-NEXT: s_mov_b64 vcc, vcc
+; GFX9-NEXT: s_cbranch_vccz .LBB7_4
+; GFX9-NEXT: .LBB7_5: ; %UnifiedReturnBlock
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b32 s33, s8
-; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX1064-NEXT: s_mov_b32 s40, s7
-; GFX1064-NEXT: s_mov_b32 s41, s6
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s9
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_mov_b32 s14, s8
+; GFX1064-NEXT: s_add_u32 s8, s2, 44
+; GFX1064-NEXT: s_addc_u32 s9, s3, 0
+; GFX1064-NEXT: s_getpc_b64 s[2:3]
+; GFX1064-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1064-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1064-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: s_movk_i32 s32, 0x800
+; GFX1064-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b32 s12, s6
+; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1064-NEXT: s_mov_b32 s13, s7
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: v_mov_b32_e32 v3, 0
-; GFX1064-NEXT: v_mov_b32_e32 v4, 0x7ff80000
+; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1064-NEXT: v_mov_b32_e32 v2, 0
+; GFX1064-NEXT: v_mov_b32_e32 v3, 0x7ff80000
; GFX1064-NEXT: s_mov_b64 s[0:1], exec
; GFX1064-NEXT: .LBB7_1: ; %ComputeLoop
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-NEXT: s_ff1_i32_b64 s4, s[0:1]
-; GFX1064-NEXT: v_max_f64 v[2:3], v[3:4], v[3:4]
+; GFX1064-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX1064-NEXT: v_readlane_b32 s3, v1, s4
; GFX1064-NEXT: v_readlane_b32 s2, v0, s4
; GFX1064-NEXT: v_max_f64 v[4:5], s[2:3], s[2:3]
; GFX1064-NEXT: s_lshl_b64 s[2:3], 1, s4
; GFX1064-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3]
; GFX1064-NEXT: s_cmp_lg_u64 s[0:1], 0
-; GFX1064-NEXT: v_max_f64 v[3:4], v[2:3], v[4:5]
+; GFX1064-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5]
; GFX1064-NEXT: s_cbranch_scc1 .LBB7_1
; GFX1064-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
@@ -4798,90 +4028,40 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1064-NEXT: s_cbranch_execz .LBB7_5
; GFX1064-NEXT: ; %bb.3:
-; GFX1064-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1064-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1064-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1064-NEXT: s_and_b64 vcc, exec, 0
; GFX1064-NEXT: .LBB7_4: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42]
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-NEXT: s_cbranch_execnz .LBB7_4
-; GFX1064-NEXT: .LBB7_5:
+; GFX1064-NEXT: s_cbranch_vccz .LBB7_4
+; GFX1064-NEXT: .LBB7_5: ; %UnifiedReturnBlock
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b32 s33, s8
-; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX1032-NEXT: s_mov_b32 s40, s7
-; GFX1032-NEXT: s_mov_b32 s41, s6
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s9
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_mov_b32 s14, s8
+; GFX1032-NEXT: s_add_u32 s8, s2, 44
+; GFX1032-NEXT: s_addc_u32 s9, s3, 0
+; GFX1032-NEXT: s_getpc_b64 s[2:3]
+; GFX1032-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1032-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1032-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b32 s12, s6
+; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1032-NEXT: s_mov_b32 s13, s7
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-NEXT: v_mov_b32_e32 v3, 0x7ff80000
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
@@ -4899,85 +4079,35 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1032-NEXT: s_cbranch_scc1 .LBB7_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s44, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1032-NEXT: s_cbranch_execz .LBB7_5
; GFX1032-NEXT: ; %bb.3:
-; GFX1032-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1032-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: global_load_dwordx2 v[4:5], v0, s[42:43]
+; GFX1032-NEXT: s_mov_b32 vcc_lo, 0
; GFX1032-NEXT: .LBB7_4: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5]
-; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v4, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-NEXT: s_cbranch_execnz .LBB7_4
-; GFX1032-NEXT: .LBB7_5:
+; GFX1032-NEXT: s_cbranch_vccz .LBB7_4
+; GFX1032-NEXT: .LBB7_5: ; %UnifiedReturnBlock
; GFX1032-NEXT: s_endpgm
;
; GFX1164-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX1164: ; %bb.0:
-; GFX1164-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1164-NEXT: s_mov_b32 s33, s8
-; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
+; GFX1164-NEXT: s_mov_b32 s14, s8
+; GFX1164-NEXT: s_add_u32 s8, s2, 44
+; GFX1164-NEXT: s_addc_u32 s9, s3, 0
+; GFX1164-NEXT: s_getpc_b64 s[2:3]
+; GFX1164-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1164-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1164-NEXT: v_mov_b32_e32 v31, v0
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1164-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1164-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b32 s12, s6
; GFX1164-NEXT: s_mov_b32 s13, s7
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: s_mov_b32 s32, 32
-; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b32 s40, s7
-; GFX1164-NEXT: s_mov_b32 s41, s6
+; GFX1164-NEXT: s_mov_b32 s32, 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; GFX1164-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1164-NEXT: v_mov_b32_e32 v2, 0
; GFX1164-NEXT: v_mov_b32_e32 v3, 0x7ff80000
; GFX1164-NEXT: s_mov_b64 s[0:1], exec
@@ -5005,79 +4135,30 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1164-NEXT: s_cbranch_execz .LBB7_5
; GFX1164-NEXT: ; %bb.3:
-; GFX1164-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1164-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1164-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[42:43]
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-NEXT: .p2align 6
+; GFX1164-NEXT: s_and_b64 vcc, exec, 0
; GFX1164-NEXT: .LBB7_4: ; %atomicrmw.start
; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5]
-; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s41
-; GFX1164-NEXT: s_mov_b32 s13, s40
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
-; GFX1164-NEXT: s_clause 0x1
-; GFX1164-NEXT: scratch_store_b64 off, v[4:5], off
-; GFX1164-NEXT: scratch_store_b64 off, v[0:1], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-NEXT: scratch_load_b64 v[4:5], off, off
-; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-NEXT: s_cbranch_execnz .LBB7_4
-; GFX1164-NEXT: .LBB7_5:
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-NEXT: s_cbranch_vccz .LBB7_4
+; GFX1164-NEXT: .LBB7_5: ; %UnifiedReturnBlock
; GFX1164-NEXT: s_endpgm
;
; GFX1132-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX1132: ; %bb.0:
-; GFX1132-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_add_u32 s8, s2, 44
+; GFX1132-NEXT: s_addc_u32 s9, s3, 0
+; GFX1132-NEXT: s_getpc_b64 s[2:3]
+; GFX1132-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1132-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1132-NEXT: v_mov_b32_e32 v31, v0
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b32 s40, s14
-; GFX1132-NEXT: s_mov_b32 s41, s13
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1132-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
+; GFX1132-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b32 s13, s14
; GFX1132-NEXT: s_mov_b32 s14, s15
-; GFX1132-NEXT: s_mov_b32 s32, 32
-; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: v_mov_b32_e32 v40, v0
+; GFX1132-NEXT: s_mov_b32 s32, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1132-NEXT: v_mov_b32_e32 v2, 0
; GFX1132-NEXT: v_mov_b32_e32 v3, 0x7ff80000
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
@@ -5098,830 +4179,234 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-NEXT: s_cbranch_scc1 .LBB7_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s44, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1132-NEXT: s_cbranch_execz .LBB7_5
; GFX1132-NEXT: ; %bb.3:
-; GFX1132-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1132-NEXT: v_mov_b32_e32 v0, 0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX1132-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: global_load_b64 v[4:5], v0, s[42:43]
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-NEXT: .p2align 6
+; GFX1132-NEXT: s_mov_b32 vcc_lo, 0
; GFX1132-NEXT: .LBB7_4: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5]
-; GFX1132-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-NEXT: v_mov_b32_e32 v31, v40
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: v_mov_b32_e32 v3, s43
-; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-NEXT: s_mov_b32 s12, s41
-; GFX1132-NEXT: s_mov_b32 s13, s40
-; GFX1132-NEXT: s_mov_b32 s14, s33
-; GFX1132-NEXT: v_mov_b32_e32 v2, s42
-; GFX1132-NEXT: v_mov_b32_e32 v6, 0
-; GFX1132-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
-; GFX1132-NEXT: s_clause 0x1
-; GFX1132-NEXT: scratch_store_b64 off, v[4:5], off
-; GFX1132-NEXT: scratch_store_b64 off, v[0:1], off offset:8
-; GFX1132-NEXT: v_mov_b32_e32 v4, 0
-; GFX1132-NEXT: v_dual_mov_b32 v0, 8 :: v_dual_mov_b32 v1, 0
-; GFX1132-NEXT: v_mov_b32_e32 v5, 8
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-NEXT: scratch_load_b64 v[4:5], off, off
-; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-NEXT: s_cbranch_execnz .LBB7_4
-; GFX1132-NEXT: .LBB7_5:
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-NEXT: s_cbranch_vccz .LBB7_4
+; GFX1132-NEXT: .LBB7_5: ; %UnifiedReturnBlock
; GFX1132-NEXT: s_endpgm
;
; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
-; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[2:3], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x400
+; GFX7LESS-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX7LESS-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s8
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX7LESS-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX7LESS-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX7LESS-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX7LESS-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s6
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s7
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[44:47], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[42:43], 0
-; GFX7LESS-DPP-NEXT: v_max_f64 v[41:42], v[0:1], v[0:1]
+; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX7LESS-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-DPP-NEXT: .LBB7_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX7LESS-DPP-NEXT: s_or_b64 s[42:43], vcc, s[42:43]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[42:43]
-; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB7_1
-; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX7LESS-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-DPP-NEXT: s_cbranch_vccnz .LBB7_1
+; GFX7LESS-DPP-NEXT: ; %bb.2: ; %DummyReturnBlock
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b32 s33, s8
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_mov_b32 s40, s7
-; GFX9-DPP-NEXT: s_mov_b32 s41, s6
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s14, s8
+; GFX9-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX9-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX9-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX9-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX9-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
+; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
+; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-DPP-NEXT: s_mov_b32 s12, s6
+; GFX9-DPP-NEXT: s_mov_b32 s13, s7
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX9-DPP-NEXT: s_not_b64 exec, exec
-; GFX9-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX9-DPP-NEXT: s_not_b64 exec, exec
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:1 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:1 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:2 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:2 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:8 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:8 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_bcast:15 row_mask:0xa bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_bcast:15 row_mask:0xa bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: s_nop 1
-; GFX9-DPP-NEXT: v_mov_b32_dpp v8, v10 row_bcast:31 row_mask:0xc bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v9, v11 row_bcast:31 row_mask:0xc bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX9-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9]
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
+; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_readlane_b32 s43, v9, 63
-; GFX9-DPP-NEXT: v_readlane_b32 s42, v8, 63
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX9-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX9-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[42:43], s[42:43]
-; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX9-DPP-NEXT: v_max_f64 v[5:6], v[1:2], v[1:2]
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4]
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45
-; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
-; GFX9-DPP-NEXT: s_cbranch_execnz .LBB7_2
-; GFX9-DPP-NEXT: .LBB7_3:
+; GFX9-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX9-DPP-NEXT: s_cbranch_vccnz .LBB7_2
+; GFX9-DPP-NEXT: .LBB7_3: ; %UnifiedReturnBlock
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1064-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1064-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1064-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1064-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1064-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1064-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
+; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s6
+; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s7
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000
-; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1064-DPP-NEXT: s_not_b64 exec, exec
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1064-DPP-NEXT: s_not_b64 exec, exec
-; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1064-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1064-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1064-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11]
-; GFX1064-DPP-NEXT: v_readlane_b32 s3, v9, 0
-; GFX1064-DPP-NEXT: v_readlane_b32 s5, v9, 32
-; GFX1064-DPP-NEXT: v_readlane_b32 s4, v8, 32
-; GFX1064-DPP-NEXT: v_readlane_b32 s2, v8, 0
-; GFX1064-DPP-NEXT: v_max_f64 v[8:9], s[4:5], s[4:5]
-; GFX1064-DPP-NEXT: v_max_f64 v[10:11], s[2:3], s[2:3]
-; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9]
-; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1]
+; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v41, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1064-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[41:42], v[41:42]
-; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-DPP-NEXT: v_max_f64 v[5:6], v[1:2], v[1:2]
-; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB7_2
-; GFX1064-DPP-NEXT: .LBB7_3:
+; GFX1064-DPP-NEXT: s_cbranch_vccnz .LBB7_2
+; GFX1064-DPP-NEXT: .LBB7_3: ; %UnifiedReturnBlock
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1032-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1032-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1032-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1032-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1032-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1032-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s6
+; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s7
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000
-; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1032-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1032-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1032-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1032-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1032-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11]
-; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
+; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s44, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1032-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB7_2
-; GFX1032-DPP-NEXT: .LBB7_3:
+; GFX1032-DPP-NEXT: s_cbranch_vccnz .LBB7_2
+; GFX1032-DPP-NEXT: .LBB7_3: ; %UnifiedReturnBlock
; GFX1032-DPP-NEXT: s_endpgm
;
; GFX1164-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX1164-DPP: ; %bb.0:
-; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
+; GFX1164-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1164-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1164-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1164-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1164-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1164-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v0
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1164-DPP-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s6
; GFX1164-DPP-NEXT: s_mov_b32 s13, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s41, s6
+; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000
-; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1164-DPP-NEXT: s_not_b64 exec, exec
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1164-DPP-NEXT: s_not_b64 exec, exec
-; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfff
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1164-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_permlane64_b32 v11, v9
-; GFX1164-DPP-NEXT: v_permlane64_b32 v10, v8
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11]
-; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, v8
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, v9
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1164-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-DPP-NEXT: .p2align 6
+; GFX1164-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX1164-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42]
-; GFX1164-DPP-NEXT: s_clause 0x1
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB7_2
-; GFX1164-DPP-NEXT: .LBB7_3:
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-DPP-NEXT: s_cbranch_vccnz .LBB7_2
+; GFX1164-DPP-NEXT: .LBB7_3: ; %UnifiedReturnBlock
; GFX1164-DPP-NEXT: s_endpgm
;
; GFX1132-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX1132-DPP: ; %bb.0:
-; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1132-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1132-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1132-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1132-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b32 s40, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s41, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1132-DPP-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
+; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
-; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
+; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000
-; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1132-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1132-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9]
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v11, v9 :: v_dual_mov_b32 v10, v8
-; GFX1132-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11]
-; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, v8
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s44, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1132-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-DPP-NEXT: .p2align 6
+; GFX1132-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42]
-; GFX1132-DPP-NEXT: s_clause 0x1
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB7_2
-; GFX1132-DPP-NEXT: .LBB7_3:
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-DPP-NEXT: s_cbranch_vccnz .LBB7_2
+; GFX1132-DPP-NEXT: .LBB7_3: ; %UnifiedReturnBlock
; GFX1132-DPP-NEXT: s_endpgm
%divValue = call double @div.double.value()
%result = atomicrmw fmax ptr addrspace(1) %ptr, double %divValue syncscope("agent") monotonic, align 4
@@ -7345,862 +5830,198 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe(ptr addrspace(1) %ptr) #0 {
; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
; GFX7LESS: ; %bb.0:
-; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0
-; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX7LESS-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s14, -1
+; GFX7LESS-NEXT: s_mov_b32 s15, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s12, s12, s9
+; GFX7LESS-NEXT: s_addc_u32 s13, s13, 0
+; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
+; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
+; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX7LESS-NEXT: s_cbranch_execz .LBB10_3
; GFX7LESS-NEXT: ; %bb.1:
-; GFX7LESS-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x9
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX7LESS-NEXT: s_mov_b64 s[44:45], 0
-; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, s0
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, s1
-; GFX7LESS-NEXT: v_or_b32_e32 v40, v3, v2
+; GFX7LESS-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_max_f64 v[0:1], v[2:3], 4.0
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s42
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s43
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX7LESS-NEXT: s_cbranch_execnz .LBB10_2
-; GFX7LESS-NEXT: .LBB10_3:
+; GFX7LESS-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX7LESS-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX9-NEXT: s_add_u32 s48, s48, s9
-; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GFX9-NEXT: s_movk_i32 s32, 0x800
+; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-NEXT: s_cbranch_execz .LBB10_3
; GFX9-NEXT: ; %bb.1:
-; GFX9-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX9-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX9-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX9-NEXT: s_mov_b32 s33, s8
-; GFX9-NEXT: s_mov_b32 s40, s7
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX9-NEXT: s_mov_b32 s41, s6
-; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-NEXT: v_mov_b32_e32 v1, s0
-; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3
+; GFX9-NEXT: s_and_b64 vcc, exec, -1
; GFX9-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-NEXT: s_cbranch_execnz .LBB10_2
-; GFX9-NEXT: .LBB10_3:
+; GFX9-NEXT: s_mov_b64 vcc, vcc
+; GFX9-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX9-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX1064-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-NEXT: s_movk_i32 s32, 0x800
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-NEXT: s_cbranch_execz .LBB10_3
; GFX1064-NEXT: ; %bb.1:
-; GFX1064-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1064-NEXT: s_mov_b32 s33, s8
-; GFX1064-NEXT: s_mov_b32 s40, s7
-; GFX1064-NEXT: s_mov_b32 s41, s6
-; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1064-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: v_mov_b32_e32 v2, s1
-; GFX1064-NEXT: v_mov_b32_e32 v1, s0
+; GFX1064-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1064-NEXT: s_add_u32 s8, s36, 44
-; GFX1064-NEXT: s_addc_u32 s9, s37, 0
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1064-NEXT: .LBB10_3:
+; GFX1064-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1064-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-NEXT: s_mov_b32 s44, 0
-; GFX1032-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB10_3
; GFX1032-NEXT: ; %bb.1:
-; GFX1032-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1032-NEXT: s_mov_b32 s33, s8
-; GFX1032-NEXT: s_mov_b32 s40, s7
-; GFX1032-NEXT: s_mov_b32 s41, s6
-; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1032-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: v_mov_b32_e32 v2, s1
-; GFX1032-NEXT: v_mov_b32_e32 v1, s0
+; GFX1032-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1032-NEXT: s_add_u32 s8, s36, 44
-; GFX1032-NEXT: s_addc_u32 s9, s37, 0
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1032-NEXT: .LBB10_3:
+; GFX1032-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1032-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1032-NEXT: s_endpgm
;
; GFX1164-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
; GFX1164: ; %bb.0:
-; GFX1164-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-NEXT: s_cbranch_execz .LBB10_3
; GFX1164-NEXT: ; %bb.1:
-; GFX1164-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1164-NEXT: s_mov_b32 s33, s8
-; GFX1164-NEXT: s_mov_b32 s40, s7
-; GFX1164-NEXT: s_mov_b32 s41, s6
-; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: v_mov_b32_e32 v2, s1
-; GFX1164-NEXT: v_mov_b32_e32 v1, s0
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-NEXT: .p2align 6
+; GFX1164-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1164-NEXT: s_add_u32 s8, s36, 44
-; GFX1164-NEXT: s_addc_u32 s9, s37, 0
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-NEXT: s_mov_b32 s12, s41
-; GFX1164-NEXT: s_mov_b32 s13, s40
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1164-NEXT: s_clause 0x1
-; GFX1164-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1164-NEXT: .LBB10_3:
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1164-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1164-NEXT: s_endpgm
;
; GFX1132-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
; GFX1132: ; %bb.0:
-; GFX1132-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-NEXT: s_mov_b32 s44, 0
-; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_cbranch_execz .LBB10_3
; GFX1132-NEXT: ; %bb.1:
-; GFX1132-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: s_mov_b32 s40, s14
-; GFX1132-NEXT: s_mov_b32 s41, s13
-; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-NEXT: .p2align 6
+; GFX1132-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1132-NEXT: s_add_u32 s8, s36, 44
-; GFX1132-NEXT: s_addc_u32 s9, s37, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-NEXT: s_mov_b32 s12, s41
-; GFX1132-NEXT: s_mov_b32 s13, s40
-; GFX1132-NEXT: s_mov_b32 s14, s33
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX1132-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1132-NEXT: s_clause 0x1
-; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1132-NEXT: .LBB10_3:
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1132-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1132-NEXT: s_endpgm
;
; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
-; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0
-; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s14, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s15, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s12, s12, s9
+; GFX7LESS-DPP-NEXT: s_addc_u32 s13, s13, 0
+; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
+; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
+; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX7LESS-DPP-NEXT: ; %bb.1:
-; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x9
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, s0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, s1
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v3, v2
+; GFX7LESS-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-DPP-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], 4.0
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB10_2
-; GFX7LESS-DPP-NEXT: .LBB10_3:
+; GFX7LESS-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-DPP-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX7LESS-DPP-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
+; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX9-DPP-NEXT: s_mov_b32 s33, s8
-; GFX9-DPP-NEXT: s_mov_b32 s40, s7
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX9-DPP-NEXT: s_mov_b32 s41, s6
-; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0
-; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
+; GFX9-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX9-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-DPP-NEXT: s_cbranch_execnz .LBB10_2
-; GFX9-DPP-NEXT: .LBB10_3:
+; GFX9-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX9-DPP-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX9-DPP-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1064-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1064-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1064-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, s0
+; GFX1064-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1064-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1064-DPP-NEXT: .LBB10_3:
+; GFX1064-DPP-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1064-DPP-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-DPP-NEXT: s_mov_b32 s44, 0
-; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1032-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1032-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1032-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, s0
+; GFX1032-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1032-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1032-DPP-NEXT: .LBB10_3:
+; GFX1032-DPP-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1032-DPP-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1032-DPP-NEXT: s_endpgm
;
; GFX1164-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
; GFX1164-DPP: ; %bb.0:
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1164-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1164-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-DPP-NEXT: .p2align 6
+; GFX1164-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1164-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1164-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1164-DPP-NEXT: s_clause 0x1
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1164-DPP-NEXT: .LBB10_3:
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-DPP-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1164-DPP-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1164-DPP-NEXT: s_endpgm
;
; GFX1132-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
; GFX1132-DPP: ; %bb.0:
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-DPP-NEXT: s_mov_b32 s44, 0
-; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s40, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s41, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-DPP-NEXT: .p2align 6
+; GFX1132-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1132-DPP-NEXT: s_clause 0x1
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1132-DPP-NEXT: .LBB10_3:
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-DPP-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1132-DPP-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1132-DPP-NEXT: s_endpgm
%result = atomicrmw fmax ptr addrspace(1) %ptr, double 4.0 monotonic, align 4
ret void
@@ -8209,39 +6030,32 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe(ptr addrspace(1) %ptr) #0 {
; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX7LESS: ; %bb.0:
-; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX7LESS-NEXT: s_movk_i32 s32, 0x400
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s9
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s14, s8
+; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX7LESS-NEXT: s_add_u32 s8, s2, 44
+; GFX7LESS-NEXT: s_addc_u32 s9, s3, 0
+; GFX7LESS-NEXT: s_getpc_b64 s[2:3]
+; GFX7LESS-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX7LESS-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
+; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX7LESS-NEXT: s_mov_b32 s12, s6
+; GFX7LESS-NEXT: s_mov_b32 s13, s7
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v3, 0x7ff80000
@@ -8266,92 +6080,41 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB11_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s46, -1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-NEXT: s_mov_b64 s[42:43], 0
-; GFX7LESS-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
+; GFX7LESS-NEXT: s_and_b64 vcc, exec, 0
; GFX7LESS-NEXT: .LBB11_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_max_f64 v[0:1], v[2:3], v[41:42]
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[42:43], vcc, s[42:43]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[42:43]
-; GFX7LESS-NEXT: s_cbranch_execnz .LBB11_4
-; GFX7LESS-NEXT: .LBB11_5:
+; GFX7LESS-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-NEXT: s_cbranch_vccz .LBB11_4
+; GFX7LESS-NEXT: .LBB11_5: ; %UnifiedReturnBlock
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s9
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-NEXT: s_mov_b32 s33, s8
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX9-NEXT: s_mov_b32 s40, s7
-; GFX9-NEXT: s_mov_b32 s41, s6
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s9
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_add_u32 s8, s2, 44
+; GFX9-NEXT: s_addc_u32 s9, s3, 0
+; GFX9-NEXT: s_getpc_b64 s[2:3]
+; GFX9-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX9-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
+; GFX9-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
+; GFX9-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: s_movk_i32 s32, 0x800
+; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_mov_b64 s[0:1], exec
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000
@@ -8375,103 +6138,55 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX9-NEXT: s_cbranch_execz .LBB11_5
; GFX9-NEXT: ; %bb.3:
-; GFX9-NEXT: s_load_dwordx2 s[42:43], s[36:37], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX9-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[42:43]
+; GFX9-NEXT: s_and_b64 vcc, exec, 0
; GFX9-NEXT: .LBB11_4: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5]
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX9-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: buffer_load_dword v4, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-NEXT: s_cbranch_execnz .LBB11_4
-; GFX9-NEXT: .LBB11_5:
+; GFX9-NEXT: s_mov_b64 vcc, vcc
+; GFX9-NEXT: s_cbranch_vccz .LBB11_4
+; GFX9-NEXT: .LBB11_5: ; %UnifiedReturnBlock
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b32 s33, s8
-; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX1064-NEXT: s_mov_b32 s40, s7
-; GFX1064-NEXT: s_mov_b32 s41, s6
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s9
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_mov_b32 s14, s8
+; GFX1064-NEXT: s_add_u32 s8, s2, 44
+; GFX1064-NEXT: s_addc_u32 s9, s3, 0
+; GFX1064-NEXT: s_getpc_b64 s[2:3]
+; GFX1064-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1064-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1064-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: s_movk_i32 s32, 0x800
+; GFX1064-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b32 s12, s6
+; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1064-NEXT: s_mov_b32 s13, s7
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: v_mov_b32_e32 v3, 0
-; GFX1064-NEXT: v_mov_b32_e32 v4, 0x7ff80000
+; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1064-NEXT: v_mov_b32_e32 v2, 0
+; GFX1064-NEXT: v_mov_b32_e32 v3, 0x7ff80000
; GFX1064-NEXT: s_mov_b64 s[0:1], exec
; GFX1064-NEXT: .LBB11_1: ; %ComputeLoop
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-NEXT: s_ff1_i32_b64 s4, s[0:1]
-; GFX1064-NEXT: v_max_f64 v[2:3], v[3:4], v[3:4]
+; GFX1064-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX1064-NEXT: v_readlane_b32 s3, v1, s4
; GFX1064-NEXT: v_readlane_b32 s2, v0, s4
; GFX1064-NEXT: v_max_f64 v[4:5], s[2:3], s[2:3]
; GFX1064-NEXT: s_lshl_b64 s[2:3], 1, s4
; GFX1064-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3]
; GFX1064-NEXT: s_cmp_lg_u64 s[0:1], 0
-; GFX1064-NEXT: v_max_f64 v[3:4], v[2:3], v[4:5]
+; GFX1064-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5]
; GFX1064-NEXT: s_cbranch_scc1 .LBB11_1
; GFX1064-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
@@ -8481,90 +6196,40 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1064-NEXT: s_cbranch_execz .LBB11_5
; GFX1064-NEXT: ; %bb.3:
-; GFX1064-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1064-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1064-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1064-NEXT: s_and_b64 vcc, exec, 0
; GFX1064-NEXT: .LBB11_4: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42]
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-NEXT: s_cbranch_execnz .LBB11_4
-; GFX1064-NEXT: .LBB11_5:
+; GFX1064-NEXT: s_cbranch_vccz .LBB11_4
+; GFX1064-NEXT: .LBB11_5: ; %UnifiedReturnBlock
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b32 s33, s8
-; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX1032-NEXT: s_mov_b32 s40, s7
-; GFX1032-NEXT: s_mov_b32 s41, s6
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s9
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_mov_b32 s14, s8
+; GFX1032-NEXT: s_add_u32 s8, s2, 44
+; GFX1032-NEXT: s_addc_u32 s9, s3, 0
+; GFX1032-NEXT: s_getpc_b64 s[2:3]
+; GFX1032-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1032-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1032-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b32 s12, s6
+; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1032-NEXT: s_mov_b32 s13, s7
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-NEXT: v_mov_b32_e32 v3, 0x7ff80000
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
@@ -8582,85 +6247,35 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1032-NEXT: s_cbranch_scc1 .LBB11_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s44, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1032-NEXT: s_cbranch_execz .LBB11_5
; GFX1032-NEXT: ; %bb.3:
-; GFX1032-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1032-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: global_load_dwordx2 v[4:5], v0, s[42:43]
+; GFX1032-NEXT: s_mov_b32 vcc_lo, 0
; GFX1032-NEXT: .LBB11_4: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5]
-; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v4, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-NEXT: s_cbranch_execnz .LBB11_4
-; GFX1032-NEXT: .LBB11_5:
+; GFX1032-NEXT: s_cbranch_vccz .LBB11_4
+; GFX1032-NEXT: .LBB11_5: ; %UnifiedReturnBlock
; GFX1032-NEXT: s_endpgm
;
; GFX1164-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX1164: ; %bb.0:
-; GFX1164-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1164-NEXT: s_mov_b32 s33, s8
-; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
+; GFX1164-NEXT: s_mov_b32 s14, s8
+; GFX1164-NEXT: s_add_u32 s8, s2, 44
+; GFX1164-NEXT: s_addc_u32 s9, s3, 0
+; GFX1164-NEXT: s_getpc_b64 s[2:3]
+; GFX1164-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1164-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1164-NEXT: v_mov_b32_e32 v31, v0
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1164-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1164-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b32 s12, s6
; GFX1164-NEXT: s_mov_b32 s13, s7
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: s_mov_b32 s32, 32
-; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b32 s40, s7
-; GFX1164-NEXT: s_mov_b32 s41, s6
+; GFX1164-NEXT: s_mov_b32 s32, 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; GFX1164-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1164-NEXT: v_mov_b32_e32 v2, 0
; GFX1164-NEXT: v_mov_b32_e32 v3, 0x7ff80000
; GFX1164-NEXT: s_mov_b64 s[0:1], exec
@@ -8688,79 +6303,30 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1164-NEXT: s_cbranch_execz .LBB11_5
; GFX1164-NEXT: ; %bb.3:
-; GFX1164-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1164-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1164-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[42:43]
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-NEXT: .p2align 6
+; GFX1164-NEXT: s_and_b64 vcc, exec, 0
; GFX1164-NEXT: .LBB11_4: ; %atomicrmw.start
; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5]
-; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s41
-; GFX1164-NEXT: s_mov_b32 s13, s40
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
-; GFX1164-NEXT: s_clause 0x1
-; GFX1164-NEXT: scratch_store_b64 off, v[4:5], off
-; GFX1164-NEXT: scratch_store_b64 off, v[0:1], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-NEXT: scratch_load_b64 v[4:5], off, off
-; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-NEXT: s_cbranch_execnz .LBB11_4
-; GFX1164-NEXT: .LBB11_5:
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-NEXT: s_cbranch_vccz .LBB11_4
+; GFX1164-NEXT: .LBB11_5: ; %UnifiedReturnBlock
; GFX1164-NEXT: s_endpgm
;
; GFX1132-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX1132: ; %bb.0:
-; GFX1132-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_add_u32 s8, s2, 44
+; GFX1132-NEXT: s_addc_u32 s9, s3, 0
+; GFX1132-NEXT: s_getpc_b64 s[2:3]
+; GFX1132-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1132-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1132-NEXT: v_mov_b32_e32 v31, v0
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b32 s40, s14
-; GFX1132-NEXT: s_mov_b32 s41, s13
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1132-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
+; GFX1132-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b32 s13, s14
; GFX1132-NEXT: s_mov_b32 s14, s15
-; GFX1132-NEXT: s_mov_b32 s32, 32
-; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: v_mov_b32_e32 v40, v0
+; GFX1132-NEXT: s_mov_b32 s32, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1132-NEXT: v_mov_b32_e32 v2, 0
; GFX1132-NEXT: v_mov_b32_e32 v3, 0x7ff80000
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
@@ -8781,830 +6347,234 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-NEXT: s_cbranch_scc1 .LBB11_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s44, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1132-NEXT: s_cbranch_execz .LBB11_5
; GFX1132-NEXT: ; %bb.3:
-; GFX1132-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1132-NEXT: v_mov_b32_e32 v0, 0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX1132-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: global_load_b64 v[4:5], v0, s[42:43]
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-NEXT: .p2align 6
+; GFX1132-NEXT: s_mov_b32 vcc_lo, 0
; GFX1132-NEXT: .LBB11_4: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5]
-; GFX1132-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-NEXT: v_mov_b32_e32 v31, v40
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: v_mov_b32_e32 v3, s43
-; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-NEXT: s_mov_b32 s12, s41
-; GFX1132-NEXT: s_mov_b32 s13, s40
-; GFX1132-NEXT: s_mov_b32 s14, s33
-; GFX1132-NEXT: v_mov_b32_e32 v2, s42
-; GFX1132-NEXT: v_mov_b32_e32 v6, 0
-; GFX1132-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
-; GFX1132-NEXT: s_clause 0x1
-; GFX1132-NEXT: scratch_store_b64 off, v[4:5], off
-; GFX1132-NEXT: scratch_store_b64 off, v[0:1], off offset:8
-; GFX1132-NEXT: v_mov_b32_e32 v4, 0
-; GFX1132-NEXT: v_dual_mov_b32 v0, 8 :: v_dual_mov_b32 v1, 0
-; GFX1132-NEXT: v_mov_b32_e32 v5, 8
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-NEXT: scratch_load_b64 v[4:5], off, off
-; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-NEXT: s_cbranch_execnz .LBB11_4
-; GFX1132-NEXT: .LBB11_5:
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-NEXT: s_cbranch_vccz .LBB11_4
+; GFX1132-NEXT: .LBB11_5: ; %UnifiedReturnBlock
; GFX1132-NEXT: s_endpgm
;
; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
-; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[2:3], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x400
+; GFX7LESS-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX7LESS-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s8
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX7LESS-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX7LESS-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX7LESS-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX7LESS-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s6
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s7
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[44:47], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[42:43], 0
-; GFX7LESS-DPP-NEXT: v_max_f64 v[41:42], v[0:1], v[0:1]
+; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX7LESS-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-DPP-NEXT: .LBB11_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX7LESS-DPP-NEXT: s_or_b64 s[42:43], vcc, s[42:43]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[42:43]
-; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB11_1
-; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX7LESS-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-DPP-NEXT: s_cbranch_vccnz .LBB11_1
+; GFX7LESS-DPP-NEXT: ; %bb.2: ; %DummyReturnBlock
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b32 s33, s8
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_mov_b32 s40, s7
-; GFX9-DPP-NEXT: s_mov_b32 s41, s6
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s14, s8
+; GFX9-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX9-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX9-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX9-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX9-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
+; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
+; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-DPP-NEXT: s_mov_b32 s12, s6
+; GFX9-DPP-NEXT: s_mov_b32 s13, s7
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX9-DPP-NEXT: s_not_b64 exec, exec
-; GFX9-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX9-DPP-NEXT: s_not_b64 exec, exec
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:1 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:1 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:2 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:2 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:8 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:8 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_bcast:15 row_mask:0xa bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_bcast:15 row_mask:0xa bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: s_nop 1
-; GFX9-DPP-NEXT: v_mov_b32_dpp v8, v10 row_bcast:31 row_mask:0xc bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v9, v11 row_bcast:31 row_mask:0xc bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX9-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9]
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
+; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_readlane_b32 s43, v9, 63
-; GFX9-DPP-NEXT: v_readlane_b32 s42, v8, 63
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX9-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX9-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[42:43], s[42:43]
-; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX9-DPP-NEXT: v_max_f64 v[5:6], v[1:2], v[1:2]
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4]
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45
-; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
-; GFX9-DPP-NEXT: s_cbranch_execnz .LBB11_2
-; GFX9-DPP-NEXT: .LBB11_3:
+; GFX9-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX9-DPP-NEXT: s_cbranch_vccnz .LBB11_2
+; GFX9-DPP-NEXT: .LBB11_3: ; %UnifiedReturnBlock
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1064-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1064-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1064-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1064-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1064-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1064-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
+; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s6
+; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s7
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000
-; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1064-DPP-NEXT: s_not_b64 exec, exec
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1064-DPP-NEXT: s_not_b64 exec, exec
-; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1064-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1064-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1064-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11]
-; GFX1064-DPP-NEXT: v_readlane_b32 s3, v9, 0
-; GFX1064-DPP-NEXT: v_readlane_b32 s5, v9, 32
-; GFX1064-DPP-NEXT: v_readlane_b32 s4, v8, 32
-; GFX1064-DPP-NEXT: v_readlane_b32 s2, v8, 0
-; GFX1064-DPP-NEXT: v_max_f64 v[8:9], s[4:5], s[4:5]
-; GFX1064-DPP-NEXT: v_max_f64 v[10:11], s[2:3], s[2:3]
-; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9]
-; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1]
+; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v41, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1064-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[41:42], v[41:42]
-; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-DPP-NEXT: v_max_f64 v[5:6], v[1:2], v[1:2]
-; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB11_2
-; GFX1064-DPP-NEXT: .LBB11_3:
+; GFX1064-DPP-NEXT: s_cbranch_vccnz .LBB11_2
+; GFX1064-DPP-NEXT: .LBB11_3: ; %UnifiedReturnBlock
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1032-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1032-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1032-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1032-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1032-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1032-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s6
+; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s7
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000
-; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1032-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1032-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1032-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1032-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1032-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11]
-; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
+; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s44, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1032-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB11_2
-; GFX1032-DPP-NEXT: .LBB11_3:
+; GFX1032-DPP-NEXT: s_cbranch_vccnz .LBB11_2
+; GFX1032-DPP-NEXT: .LBB11_3: ; %UnifiedReturnBlock
; GFX1032-DPP-NEXT: s_endpgm
;
; GFX1164-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX1164-DPP: ; %bb.0:
-; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
+; GFX1164-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1164-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1164-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1164-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1164-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1164-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v0
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1164-DPP-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s6
; GFX1164-DPP-NEXT: s_mov_b32 s13, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s41, s6
+; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000
-; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1164-DPP-NEXT: s_not_b64 exec, exec
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1164-DPP-NEXT: s_not_b64 exec, exec
-; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfff
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1164-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_permlane64_b32 v11, v9
-; GFX1164-DPP-NEXT: v_permlane64_b32 v10, v8
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11]
-; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, v8
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, v9
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1164-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-DPP-NEXT: .p2align 6
+; GFX1164-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
; GFX1164-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42]
-; GFX1164-DPP-NEXT: s_clause 0x1
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB11_2
-; GFX1164-DPP-NEXT: .LBB11_3:
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-DPP-NEXT: s_cbranch_vccnz .LBB11_2
+; GFX1164-DPP-NEXT: .LBB11_3: ; %UnifiedReturnBlock
; GFX1164-DPP-NEXT: s_endpgm
;
; GFX1132-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX1132-DPP: ; %bb.0:
-; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1132-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1132-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1132-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1132-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b32 s40, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s41, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1132-DPP-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
+; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
-; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
+; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000
-; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1132-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1132-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[10:11], v[8:9]
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v11, v9 :: v_dual_mov_b32 v10, v8
-; GFX1132-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11]
-; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, v8
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s44, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1132-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-DPP-NEXT: .p2align 6
+; GFX1132-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42]
-; GFX1132-DPP-NEXT: s_clause 0x1
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB11_2
-; GFX1132-DPP-NEXT: .LBB11_3:
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-DPP-NEXT: s_cbranch_vccnz .LBB11_2
+; GFX1132-DPP-NEXT: .LBB11_3: ; %UnifiedReturnBlock
; GFX1132-DPP-NEXT: s_endpgm
%divValue = call double @div.double.value()
%result = atomicrmw fmax ptr addrspace(1) %ptr, double %divValue monotonic, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
index 4ab86835f90d14..20dd93a9a1c489 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
@@ -3662,862 +3662,198 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr) #0 {
; GFX7LESS-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
-; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0
-; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX7LESS-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s14, -1
+; GFX7LESS-NEXT: s_mov_b32 s15, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s12, s12, s9
+; GFX7LESS-NEXT: s_addc_u32 s13, s13, 0
+; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
+; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
+; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX7LESS-NEXT: s_cbranch_execz .LBB6_3
; GFX7LESS-NEXT: ; %bb.1:
-; GFX7LESS-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x9
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX7LESS-NEXT: s_mov_b64 s[44:45], 0
-; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, s0
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, s1
-; GFX7LESS-NEXT: v_or_b32_e32 v40, v3, v2
+; GFX7LESS-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_min_f64 v[0:1], v[2:3], 4.0
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s42
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s43
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX7LESS-NEXT: s_cbranch_execnz .LBB6_2
-; GFX7LESS-NEXT: .LBB6_3:
+; GFX7LESS-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX7LESS-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX9-NEXT: s_add_u32 s48, s48, s9
-; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GFX9-NEXT: s_movk_i32 s32, 0x800
+; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-NEXT: s_cbranch_execz .LBB6_3
; GFX9-NEXT: ; %bb.1:
-; GFX9-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX9-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX9-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX9-NEXT: s_mov_b32 s33, s8
-; GFX9-NEXT: s_mov_b32 s40, s7
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX9-NEXT: s_mov_b32 s41, s6
-; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-NEXT: v_mov_b32_e32 v1, s0
-; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3
+; GFX9-NEXT: s_and_b64 vcc, exec, -1
; GFX9-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-NEXT: s_cbranch_execnz .LBB6_2
-; GFX9-NEXT: .LBB6_3:
+; GFX9-NEXT: s_mov_b64 vcc, vcc
+; GFX9-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX9-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX1064-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-NEXT: s_movk_i32 s32, 0x800
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-NEXT: s_cbranch_execz .LBB6_3
; GFX1064-NEXT: ; %bb.1:
-; GFX1064-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1064-NEXT: s_mov_b32 s33, s8
-; GFX1064-NEXT: s_mov_b32 s40, s7
-; GFX1064-NEXT: s_mov_b32 s41, s6
-; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1064-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: v_mov_b32_e32 v2, s1
-; GFX1064-NEXT: v_mov_b32_e32 v1, s0
+; GFX1064-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1064-NEXT: s_add_u32 s8, s36, 44
-; GFX1064-NEXT: s_addc_u32 s9, s37, 0
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-NEXT: s_cbranch_execnz .LBB6_2
-; GFX1064-NEXT: .LBB6_3:
+; GFX1064-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX1064-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-NEXT: s_mov_b32 s44, 0
-; GFX1032-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB6_3
; GFX1032-NEXT: ; %bb.1:
-; GFX1032-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1032-NEXT: s_mov_b32 s33, s8
-; GFX1032-NEXT: s_mov_b32 s40, s7
-; GFX1032-NEXT: s_mov_b32 s41, s6
-; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1032-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: v_mov_b32_e32 v2, s1
-; GFX1032-NEXT: v_mov_b32_e32 v1, s0
+; GFX1032-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1032-NEXT: s_add_u32 s8, s36, 44
-; GFX1032-NEXT: s_addc_u32 s9, s37, 0
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-NEXT: s_cbranch_execnz .LBB6_2
-; GFX1032-NEXT: .LBB6_3:
+; GFX1032-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX1032-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX1032-NEXT: s_endpgm
;
; GFX1164-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1164: ; %bb.0:
-; GFX1164-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-NEXT: s_cbranch_execz .LBB6_3
; GFX1164-NEXT: ; %bb.1:
-; GFX1164-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1164-NEXT: s_mov_b32 s33, s8
-; GFX1164-NEXT: s_mov_b32 s40, s7
-; GFX1164-NEXT: s_mov_b32 s41, s6
-; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: v_mov_b32_e32 v2, s1
-; GFX1164-NEXT: v_mov_b32_e32 v1, s0
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-NEXT: .p2align 6
+; GFX1164-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1164-NEXT: s_add_u32 s8, s36, 44
-; GFX1164-NEXT: s_addc_u32 s9, s37, 0
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-NEXT: s_mov_b32 s12, s41
-; GFX1164-NEXT: s_mov_b32 s13, s40
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1164-NEXT: s_clause 0x1
-; GFX1164-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-NEXT: s_cbranch_execnz .LBB6_2
-; GFX1164-NEXT: .LBB6_3:
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX1164-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX1164-NEXT: s_endpgm
;
; GFX1132-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1132: ; %bb.0:
-; GFX1132-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-NEXT: s_mov_b32 s44, 0
-; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_cbranch_execz .LBB6_3
; GFX1132-NEXT: ; %bb.1:
-; GFX1132-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: s_mov_b32 s40, s14
-; GFX1132-NEXT: s_mov_b32 s41, s13
-; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-NEXT: .p2align 6
+; GFX1132-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1132-NEXT: s_add_u32 s8, s36, 44
-; GFX1132-NEXT: s_addc_u32 s9, s37, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-NEXT: s_mov_b32 s12, s41
-; GFX1132-NEXT: s_mov_b32 s13, s40
-; GFX1132-NEXT: s_mov_b32 s14, s33
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX1132-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1132-NEXT: s_clause 0x1
-; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-NEXT: s_cbranch_execnz .LBB6_2
-; GFX1132-NEXT: .LBB6_3:
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX1132-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX1132-NEXT: s_endpgm
;
; GFX7LESS-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
-; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0
-; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s14, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s15, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s12, s12, s9
+; GFX7LESS-DPP-NEXT: s_addc_u32 s13, s13, 0
+; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
+; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
+; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX7LESS-DPP-NEXT: ; %bb.1:
-; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x9
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, s0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, s1
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v3, v2
+; GFX7LESS-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-DPP-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-DPP-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_min_f64 v[0:1], v[2:3], 4.0
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB6_2
-; GFX7LESS-DPP-NEXT: .LBB6_3:
+; GFX7LESS-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-DPP-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX7LESS-DPP-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
+; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX9-DPP-NEXT: s_mov_b32 s33, s8
-; GFX9-DPP-NEXT: s_mov_b32 s40, s7
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX9-DPP-NEXT: s_mov_b32 s41, s6
-; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0
-; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
+; GFX9-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX9-DPP-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-DPP-NEXT: s_cbranch_execnz .LBB6_2
-; GFX9-DPP-NEXT: .LBB6_3:
+; GFX9-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX9-DPP-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX9-DPP-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1064-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1064-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1064-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, s0
+; GFX1064-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-DPP-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1064-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB6_2
-; GFX1064-DPP-NEXT: .LBB6_3:
+; GFX1064-DPP-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX1064-DPP-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-DPP-NEXT: s_mov_b32 s44, 0
-; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1032-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1032-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1032-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, s0
+; GFX1032-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-DPP-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1032-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB6_2
-; GFX1032-DPP-NEXT: .LBB6_3:
+; GFX1032-DPP-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX1032-DPP-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX1032-DPP-NEXT: s_endpgm
;
; GFX1164-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1164-DPP: ; %bb.0:
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1164-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1164-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-DPP-NEXT: .p2align 6
+; GFX1164-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-DPP-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX1164-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1164-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1164-DPP-NEXT: s_clause 0x1
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB6_2
-; GFX1164-DPP-NEXT: .LBB6_3:
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-DPP-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX1164-DPP-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX1164-DPP-NEXT: s_endpgm
;
; GFX1132-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1132-DPP: ; %bb.0:
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-DPP-NEXT: s_mov_b32 s44, 0
-; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s40, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s41, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-DPP-NEXT: .p2align 6
+; GFX1132-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-DPP-NEXT: .LBB6_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX1132-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1132-DPP-NEXT: s_clause 0x1
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB6_2
-; GFX1132-DPP-NEXT: .LBB6_3:
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-DPP-NEXT: s_cbranch_vccnz .LBB6_2
+; GFX1132-DPP-NEXT: .LBB6_3: ; %UnifiedReturnBlock
; GFX1132-DPP-NEXT: s_endpgm
%result = atomicrmw fmin ptr addrspace(1) %ptr, double 4.0 syncscope("agent") monotonic, align 4
ret void
@@ -4526,39 +3862,32 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr) #0 {
; GFX7LESS-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
-; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX7LESS-NEXT: s_movk_i32 s32, 0x400
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s9
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s14, s8
+; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX7LESS-NEXT: s_add_u32 s8, s2, 44
+; GFX7LESS-NEXT: s_addc_u32 s9, s3, 0
+; GFX7LESS-NEXT: s_getpc_b64 s[2:3]
+; GFX7LESS-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX7LESS-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
+; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX7LESS-NEXT: s_mov_b32 s12, s6
+; GFX7LESS-NEXT: s_mov_b32 s13, s7
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v3, 0x7ff80000
@@ -4583,92 +3912,41 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB7_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s46, -1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-NEXT: s_mov_b64 s[42:43], 0
-; GFX7LESS-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
+; GFX7LESS-NEXT: s_and_b64 vcc, exec, 0
; GFX7LESS-NEXT: .LBB7_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_min_f64 v[0:1], v[2:3], v[41:42]
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[42:43], vcc, s[42:43]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[42:43]
-; GFX7LESS-NEXT: s_cbranch_execnz .LBB7_4
-; GFX7LESS-NEXT: .LBB7_5:
+; GFX7LESS-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-NEXT: s_cbranch_vccz .LBB7_4
+; GFX7LESS-NEXT: .LBB7_5: ; %UnifiedReturnBlock
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s9
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-NEXT: s_mov_b32 s33, s8
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX9-NEXT: s_mov_b32 s40, s7
-; GFX9-NEXT: s_mov_b32 s41, s6
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s9
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_add_u32 s8, s2, 44
+; GFX9-NEXT: s_addc_u32 s9, s3, 0
+; GFX9-NEXT: s_getpc_b64 s[2:3]
+; GFX9-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX9-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
+; GFX9-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
+; GFX9-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: s_movk_i32 s32, 0x800
+; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_mov_b64 s[0:1], exec
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000
@@ -4692,103 +3970,55 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX9-NEXT: s_cbranch_execz .LBB7_5
; GFX9-NEXT: ; %bb.3:
-; GFX9-NEXT: s_load_dwordx2 s[42:43], s[36:37], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX9-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[42:43]
+; GFX9-NEXT: s_and_b64 vcc, exec, 0
; GFX9-NEXT: .LBB7_4: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5]
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX9-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: buffer_load_dword v4, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-NEXT: s_cbranch_execnz .LBB7_4
-; GFX9-NEXT: .LBB7_5:
+; GFX9-NEXT: s_mov_b64 vcc, vcc
+; GFX9-NEXT: s_cbranch_vccz .LBB7_4
+; GFX9-NEXT: .LBB7_5: ; %UnifiedReturnBlock
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b32 s33, s8
-; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX1064-NEXT: s_mov_b32 s40, s7
-; GFX1064-NEXT: s_mov_b32 s41, s6
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s9
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_mov_b32 s14, s8
+; GFX1064-NEXT: s_add_u32 s8, s2, 44
+; GFX1064-NEXT: s_addc_u32 s9, s3, 0
+; GFX1064-NEXT: s_getpc_b64 s[2:3]
+; GFX1064-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1064-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1064-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: s_movk_i32 s32, 0x800
+; GFX1064-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b32 s12, s6
+; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1064-NEXT: s_mov_b32 s13, s7
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: v_mov_b32_e32 v3, 0
-; GFX1064-NEXT: v_mov_b32_e32 v4, 0x7ff80000
+; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1064-NEXT: v_mov_b32_e32 v2, 0
+; GFX1064-NEXT: v_mov_b32_e32 v3, 0x7ff80000
; GFX1064-NEXT: s_mov_b64 s[0:1], exec
; GFX1064-NEXT: .LBB7_1: ; %ComputeLoop
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-NEXT: s_ff1_i32_b64 s4, s[0:1]
-; GFX1064-NEXT: v_max_f64 v[2:3], v[3:4], v[3:4]
+; GFX1064-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX1064-NEXT: v_readlane_b32 s3, v1, s4
; GFX1064-NEXT: v_readlane_b32 s2, v0, s4
; GFX1064-NEXT: v_max_f64 v[4:5], s[2:3], s[2:3]
; GFX1064-NEXT: s_lshl_b64 s[2:3], 1, s4
; GFX1064-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3]
; GFX1064-NEXT: s_cmp_lg_u64 s[0:1], 0
-; GFX1064-NEXT: v_min_f64 v[3:4], v[2:3], v[4:5]
+; GFX1064-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX1064-NEXT: s_cbranch_scc1 .LBB7_1
; GFX1064-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
@@ -4798,90 +4028,40 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1064-NEXT: s_cbranch_execz .LBB7_5
; GFX1064-NEXT: ; %bb.3:
-; GFX1064-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1064-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1064-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1064-NEXT: s_and_b64 vcc, exec, 0
; GFX1064-NEXT: .LBB7_4: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: v_min_f64 v[3:4], v[3:4], v[41:42]
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-NEXT: s_cbranch_execnz .LBB7_4
-; GFX1064-NEXT: .LBB7_5:
+; GFX1064-NEXT: s_cbranch_vccz .LBB7_4
+; GFX1064-NEXT: .LBB7_5: ; %UnifiedReturnBlock
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b32 s33, s8
-; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX1032-NEXT: s_mov_b32 s40, s7
-; GFX1032-NEXT: s_mov_b32 s41, s6
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s9
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_mov_b32 s14, s8
+; GFX1032-NEXT: s_add_u32 s8, s2, 44
+; GFX1032-NEXT: s_addc_u32 s9, s3, 0
+; GFX1032-NEXT: s_getpc_b64 s[2:3]
+; GFX1032-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1032-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1032-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b32 s12, s6
+; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1032-NEXT: s_mov_b32 s13, s7
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-NEXT: v_mov_b32_e32 v3, 0x7ff80000
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
@@ -4899,85 +4079,35 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1032-NEXT: s_cbranch_scc1 .LBB7_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s44, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1032-NEXT: s_cbranch_execz .LBB7_5
; GFX1032-NEXT: ; %bb.3:
-; GFX1032-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1032-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: global_load_dwordx2 v[4:5], v0, s[42:43]
+; GFX1032-NEXT: s_mov_b32 vcc_lo, 0
; GFX1032-NEXT: .LBB7_4: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5]
-; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v4, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-NEXT: s_cbranch_execnz .LBB7_4
-; GFX1032-NEXT: .LBB7_5:
+; GFX1032-NEXT: s_cbranch_vccz .LBB7_4
+; GFX1032-NEXT: .LBB7_5: ; %UnifiedReturnBlock
; GFX1032-NEXT: s_endpgm
;
; GFX1164-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX1164: ; %bb.0:
-; GFX1164-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1164-NEXT: s_mov_b32 s33, s8
-; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
+; GFX1164-NEXT: s_mov_b32 s14, s8
+; GFX1164-NEXT: s_add_u32 s8, s2, 44
+; GFX1164-NEXT: s_addc_u32 s9, s3, 0
+; GFX1164-NEXT: s_getpc_b64 s[2:3]
+; GFX1164-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1164-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1164-NEXT: v_mov_b32_e32 v31, v0
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1164-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1164-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b32 s12, s6
; GFX1164-NEXT: s_mov_b32 s13, s7
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: s_mov_b32 s32, 32
-; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b32 s40, s7
-; GFX1164-NEXT: s_mov_b32 s41, s6
+; GFX1164-NEXT: s_mov_b32 s32, 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; GFX1164-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1164-NEXT: v_mov_b32_e32 v2, 0
; GFX1164-NEXT: v_mov_b32_e32 v3, 0x7ff80000
; GFX1164-NEXT: s_mov_b64 s[0:1], exec
@@ -5005,79 +4135,30 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1164-NEXT: s_cbranch_execz .LBB7_5
; GFX1164-NEXT: ; %bb.3:
-; GFX1164-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1164-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1164-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[42:43]
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-NEXT: .p2align 6
+; GFX1164-NEXT: s_and_b64 vcc, exec, 0
; GFX1164-NEXT: .LBB7_4: ; %atomicrmw.start
; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5]
-; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s41
-; GFX1164-NEXT: s_mov_b32 s13, s40
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
-; GFX1164-NEXT: s_clause 0x1
-; GFX1164-NEXT: scratch_store_b64 off, v[4:5], off
-; GFX1164-NEXT: scratch_store_b64 off, v[0:1], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-NEXT: scratch_load_b64 v[4:5], off, off
-; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-NEXT: s_cbranch_execnz .LBB7_4
-; GFX1164-NEXT: .LBB7_5:
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-NEXT: s_cbranch_vccz .LBB7_4
+; GFX1164-NEXT: .LBB7_5: ; %UnifiedReturnBlock
; GFX1164-NEXT: s_endpgm
;
; GFX1132-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX1132: ; %bb.0:
-; GFX1132-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_add_u32 s8, s2, 44
+; GFX1132-NEXT: s_addc_u32 s9, s3, 0
+; GFX1132-NEXT: s_getpc_b64 s[2:3]
+; GFX1132-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1132-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1132-NEXT: v_mov_b32_e32 v31, v0
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b32 s40, s14
-; GFX1132-NEXT: s_mov_b32 s41, s13
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1132-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
+; GFX1132-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b32 s13, s14
; GFX1132-NEXT: s_mov_b32 s14, s15
-; GFX1132-NEXT: s_mov_b32 s32, 32
-; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: v_mov_b32_e32 v40, v0
+; GFX1132-NEXT: s_mov_b32 s32, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1132-NEXT: v_mov_b32_e32 v2, 0
; GFX1132-NEXT: v_mov_b32_e32 v3, 0x7ff80000
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
@@ -5098,830 +4179,234 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-NEXT: s_cbranch_scc1 .LBB7_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s44, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1132-NEXT: s_cbranch_execz .LBB7_5
; GFX1132-NEXT: ; %bb.3:
-; GFX1132-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1132-NEXT: v_mov_b32_e32 v0, 0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX1132-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: global_load_b64 v[4:5], v0, s[42:43]
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-NEXT: .p2align 6
+; GFX1132-NEXT: s_mov_b32 vcc_lo, 0
; GFX1132-NEXT: .LBB7_4: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5]
-; GFX1132-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-NEXT: v_mov_b32_e32 v31, v40
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: v_mov_b32_e32 v3, s43
-; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-NEXT: s_mov_b32 s12, s41
-; GFX1132-NEXT: s_mov_b32 s13, s40
-; GFX1132-NEXT: s_mov_b32 s14, s33
-; GFX1132-NEXT: v_mov_b32_e32 v2, s42
-; GFX1132-NEXT: v_mov_b32_e32 v6, 0
-; GFX1132-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
-; GFX1132-NEXT: s_clause 0x1
-; GFX1132-NEXT: scratch_store_b64 off, v[4:5], off
-; GFX1132-NEXT: scratch_store_b64 off, v[0:1], off offset:8
-; GFX1132-NEXT: v_mov_b32_e32 v4, 0
-; GFX1132-NEXT: v_dual_mov_b32 v0, 8 :: v_dual_mov_b32 v1, 0
-; GFX1132-NEXT: v_mov_b32_e32 v5, 8
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-NEXT: scratch_load_b64 v[4:5], off, off
-; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-NEXT: s_cbranch_execnz .LBB7_4
-; GFX1132-NEXT: .LBB7_5:
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-NEXT: s_cbranch_vccz .LBB7_4
+; GFX1132-NEXT: .LBB7_5: ; %UnifiedReturnBlock
; GFX1132-NEXT: s_endpgm
;
; GFX7LESS-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
-; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[2:3], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x400
+; GFX7LESS-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX7LESS-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s8
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX7LESS-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX7LESS-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX7LESS-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX7LESS-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s6
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s7
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[44:47], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[42:43], 0
-; GFX7LESS-DPP-NEXT: v_max_f64 v[41:42], v[0:1], v[0:1]
+; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX7LESS-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-DPP-NEXT: .LBB7_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX7LESS-DPP-NEXT: s_or_b64 s[42:43], vcc, s[42:43]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[42:43]
-; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB7_1
-; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX7LESS-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-DPP-NEXT: s_cbranch_vccnz .LBB7_1
+; GFX7LESS-DPP-NEXT: ; %bb.2: ; %DummyReturnBlock
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b32 s33, s8
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_mov_b32 s40, s7
-; GFX9-DPP-NEXT: s_mov_b32 s41, s6
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s14, s8
+; GFX9-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX9-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX9-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX9-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX9-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
+; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
+; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-DPP-NEXT: s_mov_b32 s12, s6
+; GFX9-DPP-NEXT: s_mov_b32 s13, s7
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX9-DPP-NEXT: s_not_b64 exec, exec
-; GFX9-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX9-DPP-NEXT: s_not_b64 exec, exec
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:1 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:1 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX9-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:2 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:2 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX9-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX9-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:8 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:8 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX9-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_bcast:15 row_mask:0xa bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_bcast:15 row_mask:0xa bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX9-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: s_nop 1
-; GFX9-DPP-NEXT: v_mov_b32_dpp v8, v10 row_bcast:31 row_mask:0xc bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v9, v11 row_bcast:31 row_mask:0xc bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX9-DPP-NEXT: v_min_f64 v[8:9], v[10:11], v[8:9]
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
+; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_readlane_b32 s43, v9, 63
-; GFX9-DPP-NEXT: v_readlane_b32 s42, v8, 63
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX9-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX9-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[42:43], s[42:43]
-; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX9-DPP-NEXT: v_max_f64 v[5:6], v[1:2], v[1:2]
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: v_min_f64 v[3:4], v[5:6], v[3:4]
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45
-; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
-; GFX9-DPP-NEXT: s_cbranch_execnz .LBB7_2
-; GFX9-DPP-NEXT: .LBB7_3:
+; GFX9-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX9-DPP-NEXT: s_cbranch_vccnz .LBB7_2
+; GFX9-DPP-NEXT: .LBB7_3: ; %UnifiedReturnBlock
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1064-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1064-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1064-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1064-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1064-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1064-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
+; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s6
+; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s7
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000
-; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1064-DPP-NEXT: s_not_b64 exec, exec
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1064-DPP-NEXT: s_not_b64 exec, exec
-; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1064-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1064-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1064-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1064-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX1064-DPP-NEXT: v_min_f64 v[8:9], v[10:11], v[8:9]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1064-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1064-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1064-DPP-NEXT: v_min_f64 v[8:9], v[8:9], v[10:11]
-; GFX1064-DPP-NEXT: v_readlane_b32 s3, v9, 0
-; GFX1064-DPP-NEXT: v_readlane_b32 s5, v9, 32
-; GFX1064-DPP-NEXT: v_readlane_b32 s4, v8, 32
-; GFX1064-DPP-NEXT: v_readlane_b32 s2, v8, 0
-; GFX1064-DPP-NEXT: v_max_f64 v[8:9], s[4:5], s[4:5]
-; GFX1064-DPP-NEXT: v_max_f64 v[10:11], s[2:3], s[2:3]
-; GFX1064-DPP-NEXT: v_min_f64 v[8:9], v[10:11], v[8:9]
-; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1]
+; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v41, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1064-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[41:42], v[41:42]
-; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-DPP-NEXT: v_max_f64 v[5:6], v[1:2], v[1:2]
-; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: v_min_f64 v[3:4], v[5:6], v[3:4]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB7_2
-; GFX1064-DPP-NEXT: .LBB7_3:
+; GFX1064-DPP-NEXT: s_cbranch_vccnz .LBB7_2
+; GFX1064-DPP-NEXT: .LBB7_3: ; %UnifiedReturnBlock
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1032-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1032-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1032-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1032-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1032-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1032-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s6
+; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s7
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000
-; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1032-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1032-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1032-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1032-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1032-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1032-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX1032-DPP-NEXT: v_min_f64 v[8:9], v[10:11], v[8:9]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1032-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1032-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1032-DPP-NEXT: v_min_f64 v[8:9], v[8:9], v[10:11]
-; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
+; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s44, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1032-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: v_min_f64 v[3:4], v[3:4], v[41:42]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB7_2
-; GFX1032-DPP-NEXT: .LBB7_3:
+; GFX1032-DPP-NEXT: s_cbranch_vccnz .LBB7_2
+; GFX1032-DPP-NEXT: .LBB7_3: ; %UnifiedReturnBlock
; GFX1032-DPP-NEXT: s_endpgm
;
; GFX1164-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX1164-DPP: ; %bb.0:
-; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
+; GFX1164-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1164-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1164-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1164-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1164-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1164-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v0
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1164-DPP-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s6
; GFX1164-DPP-NEXT: s_mov_b32 s13, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s41, s6
+; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000
-; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1164-DPP-NEXT: s_not_b64 exec, exec
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1164-DPP-NEXT: s_not_b64 exec, exec
-; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfff
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1164-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1164-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1164-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX1164-DPP-NEXT: v_min_f64 v[8:9], v[10:11], v[8:9]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1164-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1164-DPP-NEXT: v_min_f64 v[8:9], v[8:9], v[10:11]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_permlane64_b32 v11, v9
-; GFX1164-DPP-NEXT: v_permlane64_b32 v10, v8
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1164-DPP-NEXT: v_min_f64 v[8:9], v[8:9], v[10:11]
-; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, v8
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, v9
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1164-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-DPP-NEXT: .p2align 6
+; GFX1164-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX1164-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: v_min_f64 v[3:4], v[3:4], v[41:42]
-; GFX1164-DPP-NEXT: s_clause 0x1
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB7_2
-; GFX1164-DPP-NEXT: .LBB7_3:
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-DPP-NEXT: s_cbranch_vccnz .LBB7_2
+; GFX1164-DPP-NEXT: .LBB7_3: ; %UnifiedReturnBlock
; GFX1164-DPP-NEXT: s_endpgm
;
; GFX1132-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX1132-DPP: ; %bb.0:
-; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1132-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1132-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1132-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1132-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b32 s40, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s41, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1132-DPP-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
+; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
-; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
+; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000
-; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1132-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1132-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1132-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1132-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1132-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX1132-DPP-NEXT: v_min_f64 v[8:9], v[10:11], v[8:9]
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v11, v9 :: v_dual_mov_b32 v10, v8
-; GFX1132-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_min_f64 v[8:9], v[8:9], v[10:11]
-; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, v8
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s44, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1132-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-DPP-NEXT: .p2align 6
+; GFX1132-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX1132-DPP-NEXT: v_min_f64 v[3:4], v[3:4], v[41:42]
-; GFX1132-DPP-NEXT: s_clause 0x1
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB7_2
-; GFX1132-DPP-NEXT: .LBB7_3:
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-DPP-NEXT: s_cbranch_vccnz .LBB7_2
+; GFX1132-DPP-NEXT: .LBB7_3: ; %UnifiedReturnBlock
; GFX1132-DPP-NEXT: s_endpgm
%divValue = call double @div.double.value()
%result = atomicrmw fmin ptr addrspace(1) %ptr, double %divValue syncscope("agent") monotonic, align 4
@@ -7345,862 +5830,198 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe(ptr addrspace(1) %ptr) #0 {
; GFX7LESS-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
; GFX7LESS: ; %bb.0:
-; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0
-; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX7LESS-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s14, -1
+; GFX7LESS-NEXT: s_mov_b32 s15, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s12, s12, s9
+; GFX7LESS-NEXT: s_addc_u32 s13, s13, 0
+; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
+; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
+; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX7LESS-NEXT: s_cbranch_execz .LBB10_3
; GFX7LESS-NEXT: ; %bb.1:
-; GFX7LESS-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x9
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX7LESS-NEXT: s_mov_b64 s[44:45], 0
-; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, s0
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, s1
-; GFX7LESS-NEXT: v_or_b32_e32 v40, v3, v2
+; GFX7LESS-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_min_f64 v[0:1], v[2:3], 4.0
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s42
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s43
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX7LESS-NEXT: s_cbranch_execnz .LBB10_2
-; GFX7LESS-NEXT: .LBB10_3:
+; GFX7LESS-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX7LESS-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX9-NEXT: s_add_u32 s48, s48, s9
-; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GFX9-NEXT: s_movk_i32 s32, 0x800
+; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-NEXT: s_cbranch_execz .LBB10_3
; GFX9-NEXT: ; %bb.1:
-; GFX9-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX9-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX9-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX9-NEXT: s_mov_b32 s33, s8
-; GFX9-NEXT: s_mov_b32 s40, s7
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX9-NEXT: s_mov_b32 s41, s6
-; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-NEXT: v_mov_b32_e32 v1, s0
-; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3
+; GFX9-NEXT: s_and_b64 vcc, exec, -1
; GFX9-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-NEXT: s_cbranch_execnz .LBB10_2
-; GFX9-NEXT: .LBB10_3:
+; GFX9-NEXT: s_mov_b64 vcc, vcc
+; GFX9-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX9-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX1064-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-NEXT: s_movk_i32 s32, 0x800
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-NEXT: s_cbranch_execz .LBB10_3
; GFX1064-NEXT: ; %bb.1:
-; GFX1064-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1064-NEXT: s_mov_b32 s33, s8
-; GFX1064-NEXT: s_mov_b32 s40, s7
-; GFX1064-NEXT: s_mov_b32 s41, s6
-; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1064-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: v_mov_b32_e32 v2, s1
-; GFX1064-NEXT: v_mov_b32_e32 v1, s0
+; GFX1064-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1064-NEXT: s_add_u32 s8, s36, 44
-; GFX1064-NEXT: s_addc_u32 s9, s37, 0
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1064-NEXT: .LBB10_3:
+; GFX1064-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1064-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-NEXT: s_mov_b32 s44, 0
-; GFX1032-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB10_3
; GFX1032-NEXT: ; %bb.1:
-; GFX1032-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1032-NEXT: s_mov_b32 s33, s8
-; GFX1032-NEXT: s_mov_b32 s40, s7
-; GFX1032-NEXT: s_mov_b32 s41, s6
-; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1032-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: v_mov_b32_e32 v2, s1
-; GFX1032-NEXT: v_mov_b32_e32 v1, s0
+; GFX1032-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1032-NEXT: s_add_u32 s8, s36, 44
-; GFX1032-NEXT: s_addc_u32 s9, s37, 0
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1032-NEXT: .LBB10_3:
+; GFX1032-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1032-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1032-NEXT: s_endpgm
;
; GFX1164-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
; GFX1164: ; %bb.0:
-; GFX1164-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-NEXT: s_cbranch_execz .LBB10_3
; GFX1164-NEXT: ; %bb.1:
-; GFX1164-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1164-NEXT: s_mov_b32 s33, s8
-; GFX1164-NEXT: s_mov_b32 s40, s7
-; GFX1164-NEXT: s_mov_b32 s41, s6
-; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: v_mov_b32_e32 v2, s1
-; GFX1164-NEXT: v_mov_b32_e32 v1, s0
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-NEXT: .p2align 6
+; GFX1164-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1164-NEXT: s_add_u32 s8, s36, 44
-; GFX1164-NEXT: s_addc_u32 s9, s37, 0
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-NEXT: s_mov_b32 s12, s41
-; GFX1164-NEXT: s_mov_b32 s13, s40
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1164-NEXT: s_clause 0x1
-; GFX1164-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1164-NEXT: .LBB10_3:
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1164-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1164-NEXT: s_endpgm
;
; GFX1132-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
; GFX1132: ; %bb.0:
-; GFX1132-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-NEXT: s_mov_b32 s44, 0
-; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_cbranch_execz .LBB10_3
; GFX1132-NEXT: ; %bb.1:
-; GFX1132-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: s_mov_b32 s40, s14
-; GFX1132-NEXT: s_mov_b32 s41, s13
-; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-NEXT: .p2align 6
+; GFX1132-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1132-NEXT: s_add_u32 s8, s36, 44
-; GFX1132-NEXT: s_addc_u32 s9, s37, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-NEXT: s_mov_b32 s12, s41
-; GFX1132-NEXT: s_mov_b32 s13, s40
-; GFX1132-NEXT: s_mov_b32 s14, s33
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX1132-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1132-NEXT: s_clause 0x1
-; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1132-NEXT: .LBB10_3:
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1132-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1132-NEXT: s_endpgm
;
; GFX7LESS-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
-; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0
-; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s14, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s15, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s12, s12, s9
+; GFX7LESS-DPP-NEXT: s_addc_u32 s13, s13, 0
+; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
+; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
+; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX7LESS-DPP-NEXT: ; %bb.1:
-; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x9
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, s0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, s1
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v3, v2
+; GFX7LESS-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-DPP-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_min_f64 v[0:1], v[2:3], 4.0
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB10_2
-; GFX7LESS-DPP-NEXT: .LBB10_3:
+; GFX7LESS-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-DPP-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX7LESS-DPP-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
+; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX9-DPP-NEXT: s_mov_b32 s33, s8
-; GFX9-DPP-NEXT: s_mov_b32 s40, s7
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX9-DPP-NEXT: s_mov_b32 s41, s6
-; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0
-; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
+; GFX9-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX9-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-DPP-NEXT: s_cbranch_execnz .LBB10_2
-; GFX9-DPP-NEXT: .LBB10_3:
+; GFX9-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX9-DPP-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX9-DPP-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1064-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1064-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1064-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, s0
+; GFX1064-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1064-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1064-DPP-NEXT: .LBB10_3:
+; GFX1064-DPP-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1064-DPP-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-DPP-NEXT: s_mov_b32 s44, 0
-; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1032-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1032-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1032-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, s0
+; GFX1032-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1032-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1032-DPP-NEXT: .LBB10_3:
+; GFX1032-DPP-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1032-DPP-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1032-DPP-NEXT: s_endpgm
;
; GFX1164-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
; GFX1164-DPP: ; %bb.0:
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1164-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1164-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-DPP-NEXT: .p2align 6
+; GFX1164-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1164-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1164-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1164-DPP-NEXT: s_clause 0x1
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1164-DPP-NEXT: .LBB10_3:
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-DPP-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1164-DPP-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1164-DPP-NEXT: s_endpgm
;
; GFX1132-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
; GFX1132-DPP: ; %bb.0:
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-DPP-NEXT: s_mov_b32 s44, 0
-; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s40, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s41, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-DPP-NEXT: .p2align 6
+; GFX1132-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX1132-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1132-DPP-NEXT: s_clause 0x1
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1132-DPP-NEXT: .LBB10_3:
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-DPP-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1132-DPP-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1132-DPP-NEXT: s_endpgm
%result = atomicrmw fmin ptr addrspace(1) %ptr, double 4.0 monotonic, align 4
ret void
@@ -8209,39 +6030,32 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe(ptr addrspace(1) %ptr) #0 {
; GFX7LESS-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX7LESS: ; %bb.0:
-; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX7LESS-NEXT: s_movk_i32 s32, 0x400
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s9
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s14, s8
+; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX7LESS-NEXT: s_add_u32 s8, s2, 44
+; GFX7LESS-NEXT: s_addc_u32 s9, s3, 0
+; GFX7LESS-NEXT: s_getpc_b64 s[2:3]
+; GFX7LESS-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX7LESS-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
+; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX7LESS-NEXT: s_mov_b32 s12, s6
+; GFX7LESS-NEXT: s_mov_b32 s13, s7
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v3, 0x7ff80000
@@ -8266,92 +6080,41 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB11_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s46, -1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-NEXT: s_mov_b64 s[42:43], 0
-; GFX7LESS-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
+; GFX7LESS-NEXT: s_and_b64 vcc, exec, 0
; GFX7LESS-NEXT: .LBB11_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_min_f64 v[0:1], v[2:3], v[41:42]
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[42:43], vcc, s[42:43]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[42:43]
-; GFX7LESS-NEXT: s_cbranch_execnz .LBB11_4
-; GFX7LESS-NEXT: .LBB11_5:
+; GFX7LESS-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-NEXT: s_cbranch_vccz .LBB11_4
+; GFX7LESS-NEXT: .LBB11_5: ; %UnifiedReturnBlock
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s9
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-NEXT: s_mov_b32 s33, s8
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX9-NEXT: s_mov_b32 s40, s7
-; GFX9-NEXT: s_mov_b32 s41, s6
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s9
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_add_u32 s8, s2, 44
+; GFX9-NEXT: s_addc_u32 s9, s3, 0
+; GFX9-NEXT: s_getpc_b64 s[2:3]
+; GFX9-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX9-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
+; GFX9-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
+; GFX9-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: s_movk_i32 s32, 0x800
+; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_mov_b64 s[0:1], exec
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000
@@ -8375,103 +6138,55 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX9-NEXT: s_cbranch_execz .LBB11_5
; GFX9-NEXT: ; %bb.3:
-; GFX9-NEXT: s_load_dwordx2 s[42:43], s[36:37], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX9-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[42:43]
+; GFX9-NEXT: s_and_b64 vcc, exec, 0
; GFX9-NEXT: .LBB11_4: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5]
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX9-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: buffer_load_dword v4, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-NEXT: s_cbranch_execnz .LBB11_4
-; GFX9-NEXT: .LBB11_5:
+; GFX9-NEXT: s_mov_b64 vcc, vcc
+; GFX9-NEXT: s_cbranch_vccz .LBB11_4
+; GFX9-NEXT: .LBB11_5: ; %UnifiedReturnBlock
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b32 s33, s8
-; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX1064-NEXT: s_mov_b32 s40, s7
-; GFX1064-NEXT: s_mov_b32 s41, s6
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s9
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_mov_b32 s14, s8
+; GFX1064-NEXT: s_add_u32 s8, s2, 44
+; GFX1064-NEXT: s_addc_u32 s9, s3, 0
+; GFX1064-NEXT: s_getpc_b64 s[2:3]
+; GFX1064-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1064-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1064-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: s_movk_i32 s32, 0x800
+; GFX1064-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b32 s12, s6
+; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1064-NEXT: s_mov_b32 s13, s7
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: v_mov_b32_e32 v3, 0
-; GFX1064-NEXT: v_mov_b32_e32 v4, 0x7ff80000
+; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1064-NEXT: v_mov_b32_e32 v2, 0
+; GFX1064-NEXT: v_mov_b32_e32 v3, 0x7ff80000
; GFX1064-NEXT: s_mov_b64 s[0:1], exec
; GFX1064-NEXT: .LBB11_1: ; %ComputeLoop
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-NEXT: s_ff1_i32_b64 s4, s[0:1]
-; GFX1064-NEXT: v_max_f64 v[2:3], v[3:4], v[3:4]
+; GFX1064-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX1064-NEXT: v_readlane_b32 s3, v1, s4
; GFX1064-NEXT: v_readlane_b32 s2, v0, s4
; GFX1064-NEXT: v_max_f64 v[4:5], s[2:3], s[2:3]
; GFX1064-NEXT: s_lshl_b64 s[2:3], 1, s4
; GFX1064-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3]
; GFX1064-NEXT: s_cmp_lg_u64 s[0:1], 0
-; GFX1064-NEXT: v_min_f64 v[3:4], v[2:3], v[4:5]
+; GFX1064-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX1064-NEXT: s_cbranch_scc1 .LBB11_1
; GFX1064-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
@@ -8481,90 +6196,40 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1064-NEXT: s_cbranch_execz .LBB11_5
; GFX1064-NEXT: ; %bb.3:
-; GFX1064-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1064-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1064-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1064-NEXT: s_and_b64 vcc, exec, 0
; GFX1064-NEXT: .LBB11_4: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: v_min_f64 v[3:4], v[3:4], v[41:42]
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-NEXT: s_cbranch_execnz .LBB11_4
-; GFX1064-NEXT: .LBB11_5:
+; GFX1064-NEXT: s_cbranch_vccz .LBB11_4
+; GFX1064-NEXT: .LBB11_5: ; %UnifiedReturnBlock
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b32 s33, s8
-; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX1032-NEXT: s_mov_b32 s40, s7
-; GFX1032-NEXT: s_mov_b32 s41, s6
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s9
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_mov_b32 s14, s8
+; GFX1032-NEXT: s_add_u32 s8, s2, 44
+; GFX1032-NEXT: s_addc_u32 s9, s3, 0
+; GFX1032-NEXT: s_getpc_b64 s[2:3]
+; GFX1032-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1032-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1032-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b32 s12, s6
+; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1032-NEXT: s_mov_b32 s13, s7
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-NEXT: v_mov_b32_e32 v3, 0x7ff80000
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
@@ -8582,85 +6247,35 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1032-NEXT: s_cbranch_scc1 .LBB11_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s44, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1032-NEXT: s_cbranch_execz .LBB11_5
; GFX1032-NEXT: ; %bb.3:
-; GFX1032-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1032-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: global_load_dwordx2 v[4:5], v0, s[42:43]
+; GFX1032-NEXT: s_mov_b32 vcc_lo, 0
; GFX1032-NEXT: .LBB11_4: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5]
-; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v4, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-NEXT: s_cbranch_execnz .LBB11_4
-; GFX1032-NEXT: .LBB11_5:
+; GFX1032-NEXT: s_cbranch_vccz .LBB11_4
+; GFX1032-NEXT: .LBB11_5: ; %UnifiedReturnBlock
; GFX1032-NEXT: s_endpgm
;
; GFX1164-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX1164: ; %bb.0:
-; GFX1164-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1164-NEXT: s_mov_b32 s33, s8
-; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
+; GFX1164-NEXT: s_mov_b32 s14, s8
+; GFX1164-NEXT: s_add_u32 s8, s2, 44
+; GFX1164-NEXT: s_addc_u32 s9, s3, 0
+; GFX1164-NEXT: s_getpc_b64 s[2:3]
+; GFX1164-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1164-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1164-NEXT: v_mov_b32_e32 v31, v0
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1164-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1164-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b32 s12, s6
; GFX1164-NEXT: s_mov_b32 s13, s7
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: s_mov_b32 s32, 32
-; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b32 s40, s7
-; GFX1164-NEXT: s_mov_b32 s41, s6
+; GFX1164-NEXT: s_mov_b32 s32, 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; GFX1164-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1164-NEXT: v_mov_b32_e32 v2, 0
; GFX1164-NEXT: v_mov_b32_e32 v3, 0x7ff80000
; GFX1164-NEXT: s_mov_b64 s[0:1], exec
@@ -8688,79 +6303,30 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1164-NEXT: s_cbranch_execz .LBB11_5
; GFX1164-NEXT: ; %bb.3:
-; GFX1164-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1164-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1164-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[42:43]
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-NEXT: .p2align 6
+; GFX1164-NEXT: s_and_b64 vcc, exec, 0
; GFX1164-NEXT: .LBB11_4: ; %atomicrmw.start
; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5]
-; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s41
-; GFX1164-NEXT: s_mov_b32 s13, s40
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
-; GFX1164-NEXT: s_clause 0x1
-; GFX1164-NEXT: scratch_store_b64 off, v[4:5], off
-; GFX1164-NEXT: scratch_store_b64 off, v[0:1], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-NEXT: scratch_load_b64 v[4:5], off, off
-; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-NEXT: s_cbranch_execnz .LBB11_4
-; GFX1164-NEXT: .LBB11_5:
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-NEXT: s_cbranch_vccz .LBB11_4
+; GFX1164-NEXT: .LBB11_5: ; %UnifiedReturnBlock
; GFX1164-NEXT: s_endpgm
;
; GFX1132-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX1132: ; %bb.0:
-; GFX1132-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_add_u32 s8, s2, 44
+; GFX1132-NEXT: s_addc_u32 s9, s3, 0
+; GFX1132-NEXT: s_getpc_b64 s[2:3]
+; GFX1132-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1132-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1132-NEXT: v_mov_b32_e32 v31, v0
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b32 s40, s14
-; GFX1132-NEXT: s_mov_b32 s41, s13
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1132-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
+; GFX1132-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b32 s13, s14
; GFX1132-NEXT: s_mov_b32 s14, s15
-; GFX1132-NEXT: s_mov_b32 s32, 32
-; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: v_mov_b32_e32 v40, v0
+; GFX1132-NEXT: s_mov_b32 s32, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1132-NEXT: v_mov_b32_e32 v2, 0
; GFX1132-NEXT: v_mov_b32_e32 v3, 0x7ff80000
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
@@ -8781,830 +6347,234 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-NEXT: s_cbranch_scc1 .LBB11_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s44, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1132-NEXT: s_cbranch_execz .LBB11_5
; GFX1132-NEXT: ; %bb.3:
-; GFX1132-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1132-NEXT: v_mov_b32_e32 v0, 0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX1132-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: global_load_b64 v[4:5], v0, s[42:43]
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-NEXT: .p2align 6
+; GFX1132-NEXT: s_mov_b32 vcc_lo, 0
; GFX1132-NEXT: .LBB11_4: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-NEXT: v_max_f64 v[0:1], v[4:5], v[4:5]
-; GFX1132-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-NEXT: v_mov_b32_e32 v31, v40
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: v_mov_b32_e32 v3, s43
-; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-NEXT: s_mov_b32 s12, s41
-; GFX1132-NEXT: s_mov_b32 s13, s40
-; GFX1132-NEXT: s_mov_b32 s14, s33
-; GFX1132-NEXT: v_mov_b32_e32 v2, s42
-; GFX1132-NEXT: v_mov_b32_e32 v6, 0
-; GFX1132-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
-; GFX1132-NEXT: s_clause 0x1
-; GFX1132-NEXT: scratch_store_b64 off, v[4:5], off
-; GFX1132-NEXT: scratch_store_b64 off, v[0:1], off offset:8
-; GFX1132-NEXT: v_mov_b32_e32 v4, 0
-; GFX1132-NEXT: v_dual_mov_b32 v0, 8 :: v_dual_mov_b32 v1, 0
-; GFX1132-NEXT: v_mov_b32_e32 v5, 8
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-NEXT: scratch_load_b64 v[4:5], off, off
-; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-NEXT: s_cbranch_execnz .LBB11_4
-; GFX1132-NEXT: .LBB11_5:
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-NEXT: s_cbranch_vccz .LBB11_4
+; GFX1132-NEXT: .LBB11_5: ; %UnifiedReturnBlock
; GFX1132-NEXT: s_endpgm
;
; GFX7LESS-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
-; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[2:3], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x400
+; GFX7LESS-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX7LESS-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s8
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX7LESS-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX7LESS-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX7LESS-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX7LESS-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s6
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s7
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[44:47], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[42:43], 0
-; GFX7LESS-DPP-NEXT: v_max_f64 v[41:42], v[0:1], v[0:1]
+; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX7LESS-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-DPP-NEXT: .LBB11_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX7LESS-DPP-NEXT: s_or_b64 s[42:43], vcc, s[42:43]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[42:43]
-; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB11_1
-; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX7LESS-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-DPP-NEXT: s_cbranch_vccnz .LBB11_1
+; GFX7LESS-DPP-NEXT: ; %bb.2: ; %DummyReturnBlock
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b32 s33, s8
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_mov_b32 s40, s7
-; GFX9-DPP-NEXT: s_mov_b32 s41, s6
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s14, s8
+; GFX9-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX9-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX9-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX9-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX9-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
+; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
+; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-DPP-NEXT: s_mov_b32 s12, s6
+; GFX9-DPP-NEXT: s_mov_b32 s13, s7
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX9-DPP-NEXT: s_not_b64 exec, exec
-; GFX9-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX9-DPP-NEXT: s_not_b64 exec, exec
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:1 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:1 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX9-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:2 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:2 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX9-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX9-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:8 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:8 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX9-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_bcast:15 row_mask:0xa bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_bcast:15 row_mask:0xa bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX9-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: s_nop 1
-; GFX9-DPP-NEXT: v_mov_b32_dpp v8, v10 row_bcast:31 row_mask:0xc bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v9, v11 row_bcast:31 row_mask:0xc bank_mask:0xf
-; GFX9-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX9-DPP-NEXT: v_min_f64 v[8:9], v[10:11], v[8:9]
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
+; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_readlane_b32 s43, v9, 63
-; GFX9-DPP-NEXT: v_readlane_b32 s42, v8, 63
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX9-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX9-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[42:43], s[42:43]
-; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX9-DPP-NEXT: v_max_f64 v[5:6], v[1:2], v[1:2]
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: v_min_f64 v[3:4], v[5:6], v[3:4]
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45
-; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
-; GFX9-DPP-NEXT: s_cbranch_execnz .LBB11_2
-; GFX9-DPP-NEXT: .LBB11_3:
+; GFX9-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX9-DPP-NEXT: s_cbranch_vccnz .LBB11_2
+; GFX9-DPP-NEXT: .LBB11_3: ; %UnifiedReturnBlock
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1064-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1064-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1064-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1064-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1064-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1064-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
+; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s6
+; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s7
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000
-; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1064-DPP-NEXT: s_not_b64 exec, exec
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1064-DPP-NEXT: s_not_b64 exec, exec
-; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1064-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1064-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1064-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1064-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX1064-DPP-NEXT: v_min_f64 v[8:9], v[10:11], v[8:9]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1064-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1064-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1064-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1064-DPP-NEXT: v_min_f64 v[8:9], v[8:9], v[10:11]
-; GFX1064-DPP-NEXT: v_readlane_b32 s3, v9, 0
-; GFX1064-DPP-NEXT: v_readlane_b32 s5, v9, 32
-; GFX1064-DPP-NEXT: v_readlane_b32 s4, v8, 32
-; GFX1064-DPP-NEXT: v_readlane_b32 s2, v8, 0
-; GFX1064-DPP-NEXT: v_max_f64 v[8:9], s[4:5], s[4:5]
-; GFX1064-DPP-NEXT: v_max_f64 v[10:11], s[2:3], s[2:3]
-; GFX1064-DPP-NEXT: v_min_f64 v[8:9], v[10:11], v[8:9]
-; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1]
+; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v41, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1064-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[41:42], v[41:42]
-; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-DPP-NEXT: v_max_f64 v[5:6], v[1:2], v[1:2]
-; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: v_min_f64 v[3:4], v[5:6], v[3:4]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB11_2
-; GFX1064-DPP-NEXT: .LBB11_3:
+; GFX1064-DPP-NEXT: s_cbranch_vccnz .LBB11_2
+; GFX1064-DPP-NEXT: .LBB11_3: ; %UnifiedReturnBlock
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1032-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1032-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1032-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1032-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1032-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1032-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s6
+; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s7
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000
-; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1032-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1032-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1032-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1032-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1032-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1032-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX1032-DPP-NEXT: v_min_f64 v[8:9], v[10:11], v[8:9]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1032-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1032-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1032-DPP-NEXT: v_min_f64 v[8:9], v[8:9], v[10:11]
-; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
+; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s44, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1032-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: v_min_f64 v[3:4], v[3:4], v[41:42]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB11_2
-; GFX1032-DPP-NEXT: .LBB11_3:
+; GFX1032-DPP-NEXT: s_cbranch_vccnz .LBB11_2
+; GFX1032-DPP-NEXT: .LBB11_3: ; %UnifiedReturnBlock
; GFX1032-DPP-NEXT: s_endpgm
;
; GFX1164-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX1164-DPP: ; %bb.0:
-; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
+; GFX1164-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1164-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1164-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1164-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1164-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1164-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v0
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1164-DPP-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s6
; GFX1164-DPP-NEXT: s_mov_b32 s13, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s41, s6
+; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000
-; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1164-DPP-NEXT: s_not_b64 exec, exec
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1164-DPP-NEXT: s_not_b64 exec, exec
-; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfff
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1164-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1164-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1164-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX1164-DPP-NEXT: v_min_f64 v[8:9], v[10:11], v[8:9]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1164-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1164-DPP-NEXT: v_min_f64 v[8:9], v[8:9], v[10:11]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_permlane64_b32 v11, v9
-; GFX1164-DPP-NEXT: v_permlane64_b32 v10, v8
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1164-DPP-NEXT: v_min_f64 v[8:9], v[8:9], v[10:11]
-; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, v8
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, v9
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1164-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-DPP-NEXT: .p2align 6
+; GFX1164-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
; GFX1164-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: v_min_f64 v[3:4], v[3:4], v[41:42]
-; GFX1164-DPP-NEXT: s_clause 0x1
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB11_2
-; GFX1164-DPP-NEXT: .LBB11_3:
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-DPP-NEXT: s_cbranch_vccnz .LBB11_2
+; GFX1164-DPP-NEXT: .LBB11_3: ; %UnifiedReturnBlock
; GFX1164-DPP-NEXT: s_endpgm
;
; GFX1132-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX1132-DPP: ; %bb.0:
-; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1132-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1132-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1132-DPP-NEXT: s_add_u32 s2, s2, div.double.value at gotpcrel32@lo+4
+; GFX1132-DPP-NEXT: s_addc_u32 s3, s3, div.double.value at gotpcrel32@hi+12
; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b32 s40, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s41, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1132-DPP-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
+; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
-; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
+; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v9, 0x7ff80000
-; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1132-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1132-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1132-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1132-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX1132-DPP-NEXT: v_min_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX1132-DPP-NEXT: v_min_f64 v[8:9], v[10:11], v[8:9]
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v11, v9 :: v_dual_mov_b32 v10, v8
-; GFX1132-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1132-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_min_f64 v[8:9], v[8:9], v[10:11]
-; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, v8
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s44, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1132-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-DPP-NEXT: .p2align 6
+; GFX1132-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[1:2], v[1:2]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX1132-DPP-NEXT: v_min_f64 v[3:4], v[3:4], v[41:42]
-; GFX1132-DPP-NEXT: s_clause 0x1
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB11_2
-; GFX1132-DPP-NEXT: .LBB11_3:
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-DPP-NEXT: s_cbranch_vccnz .LBB11_2
+; GFX1132-DPP-NEXT: .LBB11_3: ; %UnifiedReturnBlock
; GFX1132-DPP-NEXT: s_endpgm
%divValue = call double @div.double.value()
%result = atomicrmw fmin ptr addrspace(1) %ptr, double %divValue monotonic, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
index cbe243a9491154..743a7d942c6279 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
@@ -6164,900 +6164,198 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr) #0 {
; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
-; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
-; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, s0, 0
-; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v3, s1, v3
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GFX7LESS-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX7LESS-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s14, -1
+; GFX7LESS-NEXT: s_mov_b32 s15, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s12, s12, s9
+; GFX7LESS-NEXT: s_addc_u32 s13, s13, 0
+; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
+; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
+; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX7LESS-NEXT: s_cbranch_execz .LBB9_3
; GFX7LESS-NEXT: ; %bb.1:
-; GFX7LESS-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-NEXT: s_load_dwordx2 s[42:43], s[36:37], 0x9
-; GFX7LESS-NEXT: s_bcnt1_i32_b64 s2, s[0:1]
-; GFX7LESS-NEXT: s_mov_b64 s[44:45], 0
-; GFX7LESS-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX7LESS-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX7LESS-NEXT: v_cvt_f64_u32_e32 v[1:2], s2
-; GFX7LESS-NEXT: v_or_b32_e32 v4, v0, v4
-; GFX7LESS-NEXT: v_mul_f64 v[41:42], v[1:2], 4.0
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, s0
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, s1
-; GFX7LESS-NEXT: v_or_b32_e32 v40, v4, v3
+; GFX7LESS-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], -v[41:42]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-NEXT: s_waitcnt expcnt(2)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s42
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s43
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX7LESS-NEXT: s_cbranch_execnz .LBB9_2
-; GFX7LESS-NEXT: .LBB9_3:
+; GFX7LESS-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX7LESS-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], exec
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: v_mbcnt_lo_u32_b32 v3, s0, 0
-; GFX9-NEXT: s_add_u32 s48, s48, s9
-; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, s1, v3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GFX9-NEXT: s_movk_i32 s32, 0x800
-; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-NEXT: s_cbranch_execz .LBB9_3
; GFX9-NEXT: ; %bb.1:
-; GFX9-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
-; GFX9-NEXT: s_load_dwordx2 s[42:43], s[36:37], 0x24
-; GFX9-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
-; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-NEXT: s_mov_b32 s33, s8
-; GFX9-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX9-NEXT: s_mov_b32 s40, s7
-; GFX9-NEXT: s_mov_b32 s41, s6
-; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v4, s1
-; GFX9-NEXT: v_mov_b32_e32 v3, s0
-; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
+; GFX9-NEXT: s_and_b64 vcc, exec, -1
; GFX9-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_add_f64 v[0:1], v[3:4], -v[41:42]
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: buffer_load_dword v3, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v4, off, s[48:51], 0 offset:4
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-NEXT: s_cbranch_execnz .LBB9_2
-; GFX9-NEXT: .LBB9_3:
+; GFX9-NEXT: s_mov_b64 vcc, vcc
+; GFX9-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX9-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-NEXT: s_mov_b32 s33, s8
-; GFX1064-NEXT: s_mov_b64 s[8:9], exec
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v3, s8, 0
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-NEXT: s_movk_i32 s32, 0x800
-; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v3, s9, v3
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-NEXT: s_cbranch_execz .LBB9_3
; GFX1064-NEXT: ; %bb.1:
-; GFX1064-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1064-NEXT: s_bcnt1_i32_b64 s0, s[8:9]
-; GFX1064-NEXT: s_mov_b32 s40, s7
-; GFX1064-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
-; GFX1064-NEXT: s_mov_b32 s41, s6
-; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1064-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
-; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: v_mov_b32_e32 v2, s1
-; GFX1064-NEXT: v_mov_b32_e32 v1, s0
+; GFX1064-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1064-NEXT: s_add_u32 s8, s36, 44
-; GFX1064-NEXT: s_addc_u32 s9, s37, 0
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-NEXT: s_cbranch_execnz .LBB9_2
-; GFX1064-NEXT: .LBB9_3:
+; GFX1064-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX1064-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s33, s8
-; GFX1032-NEXT: s_mov_b32 s8, exec_lo
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, s8, 0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-NEXT: s_mov_b32 s44, 0
-; GFX1032-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB9_3
; GFX1032-NEXT: ; %bb.1:
-; GFX1032-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1032-NEXT: s_bcnt1_i32_b32 s0, s8
-; GFX1032-NEXT: s_mov_b32 s40, s7
-; GFX1032-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
-; GFX1032-NEXT: s_mov_b32 s41, s6
-; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1032-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
-; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: v_mov_b32_e32 v2, s1
-; GFX1032-NEXT: v_mov_b32_e32 v1, s0
+; GFX1032-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1032-NEXT: s_add_u32 s8, s36, 44
-; GFX1032-NEXT: s_addc_u32 s9, s37, 0
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-NEXT: s_cbranch_execnz .LBB9_2
-; GFX1032-NEXT: .LBB9_3:
+; GFX1032-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX1032-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX1032-NEXT: s_endpgm
;
; GFX1164-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1164: ; %bb.0:
-; GFX1164-NEXT: s_mov_b32 s33, s8
-; GFX1164-NEXT: s_mov_b64 s[8:9], exec
-; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-NEXT: s_mov_b32 s32, 32
+; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, s9, v0
+; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-NEXT: s_cbranch_execz .LBB9_3
; GFX1164-NEXT: ; %bb.1:
-; GFX1164-NEXT: s_bcnt1_i32_b64 s0, s[8:9]
-; GFX1164-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1164-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
-; GFX1164-NEXT: s_mov_b32 s40, s7
-; GFX1164-NEXT: s_mov_b32 s41, s6
-; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: v_mov_b32_e32 v2, s1
-; GFX1164-NEXT: v_mov_b32_e32 v1, s0
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-NEXT: .p2align 6
+; GFX1164-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1164-NEXT: s_add_u32 s8, s36, 44
-; GFX1164-NEXT: s_addc_u32 s9, s37, 0
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-NEXT: s_mov_b32 s12, s41
-; GFX1164-NEXT: s_mov_b32 s13, s40
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: s_clause 0x1
-; GFX1164-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-NEXT: s_cbranch_execnz .LBB9_2
-; GFX1164-NEXT: .LBB9_3:
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX1164-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX1164-NEXT: s_endpgm
;
; GFX1132-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1132: ; %bb.0:
-; GFX1132-NEXT: s_mov_b32 s6, exec_lo
-; GFX1132-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-NEXT: s_mov_b32 s44, 0
-; GFX1132-NEXT: s_mov_b32 s32, 32
+; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_cbranch_execz .LBB9_3
; GFX1132-NEXT: ; %bb.1:
-; GFX1132-NEXT: s_bcnt1_i32_b32 s0, s6
-; GFX1132-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1132-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
-; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: s_mov_b32 s40, s14
-; GFX1132-NEXT: s_mov_b32 s41, s13
-; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-NEXT: .p2align 6
+; GFX1132-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1132-NEXT: s_add_u32 s8, s36, 44
-; GFX1132-NEXT: s_addc_u32 s9, s37, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-NEXT: s_mov_b32 s12, s41
-; GFX1132-NEXT: s_mov_b32 s13, s40
-; GFX1132-NEXT: s_mov_b32 s14, s33
-; GFX1132-NEXT: s_clause 0x1
-; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-NEXT: s_cbranch_execnz .LBB9_2
-; GFX1132-NEXT: .LBB9_3:
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX1132-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX1132-NEXT: s_endpgm
;
; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
-; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], exec
-; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, s0, 0
-; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v3, s1, v3
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GFX7LESS-DPP-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s14, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s15, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s12, s12, s9
+; GFX7LESS-DPP-NEXT: s_addc_u32 s13, s13, 0
+; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
+; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
+; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX7LESS-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX7LESS-DPP-NEXT: ; %bb.1:
-; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[42:43], s[36:37], 0x9
-; GFX7LESS-DPP-NEXT: s_bcnt1_i32_b64 s2, s[0:1]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX7LESS-DPP-NEXT: v_cvt_f64_u32_e32 v[1:2], s2
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v4, v0, v4
-; GFX7LESS-DPP-NEXT: v_mul_f64 v[41:42], v[1:2], 4.0
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, s0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, s1
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v4, v3
+; GFX7LESS-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-DPP-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], -v[41:42]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(2)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB9_2
-; GFX7LESS-DPP-NEXT: .LBB9_3:
+; GFX7LESS-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-DPP-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX7LESS-DPP-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], exec
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s0, 0
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, s1, v3
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX9-DPP-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
-; GFX9-DPP-NEXT: s_load_dwordx2 s[42:43], s[36:37], 0x24
-; GFX9-DPP-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
-; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-DPP-NEXT: s_mov_b32 s33, s8
-; GFX9-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX9-DPP-NEXT: s_mov_b32 s40, s7
-; GFX9-DPP-NEXT: s_mov_b32 s41, s6
-; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: v_mov_b32_e32 v4, s1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s0
-; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
+; GFX9-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX9-DPP-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX9-DPP-NEXT: v_add_f64 v[0:1], v[3:4], -v[41:42]
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: buffer_load_dword v3, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v4, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-DPP-NEXT: s_cbranch_execnz .LBB9_2
-; GFX9-DPP-NEXT: .LBB9_3:
+; GFX9-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX9-DPP-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX9-DPP-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1064-DPP-NEXT: s_mov_b64 s[8:9], exec
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s8, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, s9, v3
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s0, s[8:9]
-; GFX1064-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1064-DPP-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
-; GFX1064-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1064-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
-; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, s0
+; GFX1064-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-DPP-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1064-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB9_2
-; GFX1064-DPP-NEXT: .LBB9_3:
+; GFX1064-DPP-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX1064-DPP-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1032-DPP-NEXT: s_mov_b32 s8, exec_lo
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s8, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-DPP-NEXT: s_mov_b32 s44, 0
-; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1032-DPP-NEXT: s_bcnt1_i32_b32 s0, s8
-; GFX1032-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1032-DPP-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
-; GFX1032-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1032-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
-; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, s0
+; GFX1032-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-DPP-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1032-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB9_2
-; GFX1032-DPP-NEXT: .LBB9_3:
+; GFX1032-DPP-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX1032-DPP-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX1032-DPP-NEXT: s_endpgm
;
; GFX1164-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1164-DPP: ; %bb.0:
-; GFX1164-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1164-DPP-NEXT: s_mov_b64 s[8:9], exec
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
+; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, s9, v0
+; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_bcnt1_i32_b64 s0, s[8:9]
-; GFX1164-DPP-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1164-DPP-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
-; GFX1164-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-DPP-NEXT: .p2align 6
+; GFX1164-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-DPP-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX1164-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1164-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: s_clause 0x1
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB9_2
-; GFX1164-DPP-NEXT: .LBB9_3:
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-DPP-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX1164-DPP-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX1164-DPP-NEXT: s_endpgm
;
; GFX1132-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1132-DPP: ; %bb.0:
-; GFX1132-DPP-NEXT: s_mov_b32 s6, exec_lo
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-DPP-NEXT: s_mov_b32 s44, 0
-; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
+; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_bcnt1_i32_b32 s0, s6
-; GFX1132-DPP-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1132-DPP-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
-; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s40, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s41, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-DPP-NEXT: .p2align 6
+; GFX1132-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-DPP-NEXT: .LBB9_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1132-DPP-NEXT: s_clause 0x1
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB9_2
-; GFX1132-DPP-NEXT: .LBB9_3:
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-DPP-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX1132-DPP-NEXT: .LBB9_3: ; %UnifiedReturnBlock
; GFX1132-DPP-NEXT: s_endpgm
%result = atomicrmw fsub ptr addrspace(1) %ptr, double 4.0 syncscope("agent") monotonic, align 4
ret void
@@ -7066,42 +6364,35 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe(ptr addrspace(1) %ptr) #0 {
; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX7LESS: ; %bb.0:
-; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX7LESS-NEXT: s_movk_i32 s32, 0x400
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s9
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s14, s8
+; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX7LESS-NEXT: s_add_u32 s8, s2, 44
+; GFX7LESS-NEXT: s_addc_u32 s9, s3, 0
+; GFX7LESS-NEXT: s_getpc_b64 s[2:3]
+; GFX7LESS-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX7LESS-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
+; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX7LESS-NEXT: s_mov_b32 s12, s6
+; GFX7LESS-NEXT: s_mov_b32 s13, s7
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
-; GFX7LESS-NEXT: v_mov_b32_e32 v41, 0
-; GFX7LESS-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0
+; GFX7LESS-NEXT: v_bfrev_b32_e32 v3, 1
; GFX7LESS-NEXT: .LBB10_1: ; %ComputeLoop
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_ff1_i32_b64 s4, s[0:1]
@@ -7111,7 +6402,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5]
; GFX7LESS-NEXT: v_cmp_ne_u64_e64 s[4:5], s[0:1], 0
; GFX7LESS-NEXT: s_and_b64 vcc, exec, s[4:5]
-; GFX7LESS-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX7LESS-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX7LESS-NEXT: s_cbranch_vccnz .LBB10_1
; GFX7LESS-NEXT: ; %bb.2: ; %ComputeEnd
; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
@@ -7121,99 +6412,50 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB10_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s46, -1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-NEXT: s_mov_b64 s[42:43], 0
+; GFX7LESS-NEXT: s_and_b64 vcc, exec, 0
; GFX7LESS-NEXT: .LBB10_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], -v[41:42]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-NEXT: s_waitcnt expcnt(2)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[42:43], vcc, s[42:43]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[42:43]
-; GFX7LESS-NEXT: s_cbranch_execnz .LBB10_4
-; GFX7LESS-NEXT: .LBB10_5:
+; GFX7LESS-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-NEXT: s_cbranch_vccz .LBB10_4
+; GFX7LESS-NEXT: .LBB10_5: ; %UnifiedReturnBlock
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s9
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-NEXT: s_mov_b32 s33, s8
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX9-NEXT: s_mov_b32 s40, s7
-; GFX9-NEXT: s_mov_b32 s41, s6
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s9
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_add_u32 s8, s2, 44
+; GFX9-NEXT: s_addc_u32 s9, s3, 0
+; GFX9-NEXT: s_getpc_b64 s[2:3]
+; GFX9-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX9-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
+; GFX9-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
+; GFX9-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: s_movk_i32 s32, 0x800
+; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: v_mov_b32_e32 v41, 0
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_mov_b64 s[0:1], exec
-; GFX9-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX9-NEXT: v_bfrev_b32_e32 v3, 1
; GFX9-NEXT: .LBB10_1: ; %ComputeLoop
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: s_ff1_i32_b64 s4, s[0:1]
; GFX9-NEXT: v_readlane_b32 s3, v1, s4
; GFX9-NEXT: v_readlane_b32 s2, v0, s4
-; GFX9-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX9-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX9-NEXT: s_lshl_b64 s[2:3], 1, s4
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3]
; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0
@@ -7226,96 +6468,50 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX9-NEXT: s_cbranch_execz .LBB10_5
; GFX9-NEXT: ; %bb.3:
-; GFX9-NEXT: s_load_dwordx2 s[42:43], s[36:37], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX9-NEXT: s_and_b64 vcc, exec, 0
; GFX9-NEXT: .LBB10_4: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-NEXT: s_cbranch_execnz .LBB10_4
-; GFX9-NEXT: .LBB10_5:
+; GFX9-NEXT: s_mov_b64 vcc, vcc
+; GFX9-NEXT: s_cbranch_vccz .LBB10_4
+; GFX9-NEXT: .LBB10_5: ; %UnifiedReturnBlock
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b32 s33, s8
-; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX1064-NEXT: s_mov_b32 s40, s7
-; GFX1064-NEXT: s_mov_b32 s41, s6
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s9
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_mov_b32 s14, s8
+; GFX1064-NEXT: s_add_u32 s8, s2, 44
+; GFX1064-NEXT: s_addc_u32 s9, s3, 0
+; GFX1064-NEXT: s_getpc_b64 s[2:3]
+; GFX1064-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1064-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1064-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: s_movk_i32 s32, 0x800
+; GFX1064-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b32 s12, s6
+; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1064-NEXT: s_mov_b32 s13, s7
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: v_mov_b32_e32 v41, 0
-; GFX1064-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1064-NEXT: v_mov_b32_e32 v2, 0
+; GFX1064-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1064-NEXT: s_mov_b64 s[0:1], exec
; GFX1064-NEXT: .LBB10_1: ; %ComputeLoop
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-NEXT: s_ff1_i32_b64 s4, s[0:1]
; GFX1064-NEXT: v_readlane_b32 s3, v1, s4
; GFX1064-NEXT: v_readlane_b32 s2, v0, s4
-; GFX1064-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX1064-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX1064-NEXT: s_lshl_b64 s[2:3], 1, s4
; GFX1064-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3]
; GFX1064-NEXT: s_cmp_lg_u64 s[0:1], 0
@@ -7328,90 +6524,42 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1064-NEXT: s_cbranch_execz .LBB10_5
; GFX1064-NEXT: ; %bb.3:
-; GFX1064-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1064-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1064-NEXT: s_and_b64 vcc, exec, 0
; GFX1064-NEXT: .LBB10_4: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-NEXT: s_cbranch_execnz .LBB10_4
-; GFX1064-NEXT: .LBB10_5:
+; GFX1064-NEXT: s_cbranch_vccz .LBB10_4
+; GFX1064-NEXT: .LBB10_5: ; %UnifiedReturnBlock
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b32 s33, s8
-; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX1032-NEXT: s_mov_b32 s40, s7
-; GFX1032-NEXT: s_mov_b32 s41, s6
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s9
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_mov_b32 s14, s8
+; GFX1032-NEXT: s_add_u32 s8, s2, 44
+; GFX1032-NEXT: s_addc_u32 s9, s3, 0
+; GFX1032-NEXT: s_getpc_b64 s[2:3]
+; GFX1032-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1032-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1032-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b32 s12, s6
+; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1032-NEXT: s_mov_b32 s13, s7
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-NEXT: v_mov_b32_e32 v41, 0
-; GFX1032-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1032-NEXT: v_mov_b32_e32 v2, 0
+; GFX1032-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-NEXT: .LBB10_1: ; %ComputeLoop
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -7420,90 +6568,42 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-NEXT: v_readlane_b32 s2, v0, s1
; GFX1032-NEXT: s_lshl_b32 s1, 1, s1
; GFX1032-NEXT: s_andn2_b32 s0, s0, s1
-; GFX1032-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX1032-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX1032-NEXT: s_cmp_lg_u32 s0, 0
; GFX1032-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s44, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1032-NEXT: s_cbranch_execz .LBB10_5
; GFX1032-NEXT: ; %bb.3:
-; GFX1032-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1032-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1032-NEXT: s_mov_b32 vcc_lo, 0
; GFX1032-NEXT: .LBB10_4: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-NEXT: s_cbranch_execnz .LBB10_4
-; GFX1032-NEXT: .LBB10_5:
+; GFX1032-NEXT: s_cbranch_vccz .LBB10_4
+; GFX1032-NEXT: .LBB10_5: ; %UnifiedReturnBlock
; GFX1032-NEXT: s_endpgm
;
; GFX1164-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1164: ; %bb.0:
-; GFX1164-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1164-NEXT: s_mov_b32 s33, s8
-; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
+; GFX1164-NEXT: s_mov_b32 s14, s8
+; GFX1164-NEXT: s_add_u32 s8, s2, 44
+; GFX1164-NEXT: s_addc_u32 s9, s3, 0
+; GFX1164-NEXT: s_getpc_b64 s[2:3]
+; GFX1164-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1164-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1164-NEXT: v_mov_b32_e32 v31, v0
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1164-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1164-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b32 s12, s6
; GFX1164-NEXT: s_mov_b32 s13, s7
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: s_mov_b32 s32, 32
-; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b32 s40, s7
-; GFX1164-NEXT: s_mov_b32 s41, s6
+; GFX1164-NEXT: s_mov_b32 s32, 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-NEXT: v_mov_b32_e32 v41, 0
-; GFX1164-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX1164-NEXT: s_swappc_b64 s[30:31], s[2:3]
+; GFX1164-NEXT: v_mov_b32_e32 v2, 0
+; GFX1164-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1164-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-NEXT: .LBB10_1: ; %ComputeLoop
; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -7512,7 +6612,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-NEXT: v_readlane_b32 s3, v1, s4
; GFX1164-NEXT: v_readlane_b32 s2, v0, s4
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX1164-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX1164-NEXT: s_lshl_b64 s[2:3], 1, s4
; GFX1164-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -7527,79 +6627,32 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1164-NEXT: s_cbranch_execz .LBB10_5
; GFX1164-NEXT: ; %bb.3:
-; GFX1164-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1164-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-NEXT: .p2align 6
+; GFX1164-NEXT: s_and_b64 vcc, exec, 0
; GFX1164-NEXT: .LBB10_4: ; %atomicrmw.start
; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s41
-; GFX1164-NEXT: s_mov_b32 s13, s40
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: s_clause 0x1
-; GFX1164-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-NEXT: s_cbranch_execnz .LBB10_4
-; GFX1164-NEXT: .LBB10_5:
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-NEXT: s_cbranch_vccz .LBB10_4
+; GFX1164-NEXT: .LBB10_5: ; %UnifiedReturnBlock
; GFX1164-NEXT: s_endpgm
;
; GFX1132-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1132: ; %bb.0:
-; GFX1132-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_add_u32 s8, s2, 44
+; GFX1132-NEXT: s_addc_u32 s9, s3, 0
+; GFX1132-NEXT: s_getpc_b64 s[2:3]
+; GFX1132-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1132-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1132-NEXT: v_mov_b32_e32 v31, v0
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b32 s40, s14
-; GFX1132-NEXT: s_mov_b32 s41, s13
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1132-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
+; GFX1132-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b32 s13, s14
; GFX1132-NEXT: s_mov_b32 s14, s15
-; GFX1132-NEXT: s_mov_b32 s32, 32
-; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: v_mov_b32_e32 v40, v0
+; GFX1132-NEXT: s_mov_b32 s32, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-NEXT: v_mov_b32_e32 v41, 0
-; GFX1132-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[2:3]
+; GFX1132-NEXT: v_mov_b32_e32 v2, 0
+; GFX1132-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: .LBB10_1: ; %ComputeLoop
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -7610,780 +6663,239 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-NEXT: s_lshl_b32 s1, 1, s1
; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1132-NEXT: s_and_not1_b32 s0, s0, s1
-; GFX1132-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX1132-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX1132-NEXT: s_cmp_lg_u32 s0, 0
; GFX1132-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s44, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1132-NEXT: s_cbranch_execz .LBB10_5
; GFX1132-NEXT: ; %bb.3:
-; GFX1132-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1132-NEXT: v_mov_b32_e32 v0, 0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-NEXT: .p2align 6
+; GFX1132-NEXT: s_mov_b32 vcc_lo, 0
; GFX1132-NEXT: .LBB10_4: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1132-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-NEXT: s_mov_b32 s12, s41
-; GFX1132-NEXT: s_mov_b32 s13, s40
-; GFX1132-NEXT: s_mov_b32 s14, s33
-; GFX1132-NEXT: s_clause 0x1
-; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-NEXT: s_cbranch_execnz .LBB10_4
-; GFX1132-NEXT: .LBB10_5:
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-NEXT: s_cbranch_vccz .LBB10_4
+; GFX1132-NEXT: .LBB10_5: ; %UnifiedReturnBlock
; GFX1132-NEXT: s_endpgm
;
; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX7LESS-DPP: ; %bb.0:
-; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[2:3], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x400
+; GFX7LESS-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX7LESS-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s8
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX7LESS-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX7LESS-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX7LESS-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX7LESS-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v42, v0, v2
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s6
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s7
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v41, v1
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[42:43], 0
+; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX7LESS-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-DPP-NEXT: .LBB10_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], -v[40:41]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(2)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[42:43], vcc, s[42:43]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[42:43]
-; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB10_1
-; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX7LESS-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-DPP-NEXT: s_cbranch_vccnz .LBB10_1
+; GFX7LESS-DPP-NEXT: ; %bb.2: ; %DummyReturnBlock
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b32 s33, s8
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_mov_b32 s40, s7
-; GFX9-DPP-NEXT: s_mov_b32 s41, s6
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s14, s8
+; GFX9-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX9-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX9-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX9-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX9-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
+; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
+; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-DPP-NEXT: s_mov_b32 s12, s6
+; GFX9-DPP-NEXT: s_mov_b32 s13, s7
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX9-DPP-NEXT: v_bfrev_b32_e32 v9, 1
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX9-DPP-NEXT: s_not_b64 exec, exec
-; GFX9-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX9-DPP-NEXT: s_not_b64 exec, exec
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:1 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:1 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:2 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:2 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:8 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:8 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_bcast:15 row_mask:0xa bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_bcast:15 row_mask:0xa bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: s_nop 1
-; GFX9-DPP-NEXT: v_mov_b32_dpp v8, v10 row_bcast:31 row_mask:0xc bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v9, v11 row_bcast:31 row_mask:0xc bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[8:9], v[10:11], v[8:9]
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
+; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_readlane_b32 s43, v9, 63
-; GFX9-DPP-NEXT: v_readlane_b32 s42, v8, 63
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX9-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX9-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX9-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -s[42:43]
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45
-; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
-; GFX9-DPP-NEXT: s_cbranch_execnz .LBB10_2
-; GFX9-DPP-NEXT: .LBB10_3:
+; GFX9-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX9-DPP-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX9-DPP-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1064-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1064-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1064-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1064-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1064-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1064-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
+; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s6
+; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s7
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1064-DPP-NEXT: v_bfrev_b32_e32 v9, 1
-; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1064-DPP-NEXT: s_not_b64 exec, exec
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1064-DPP-NEXT: s_not_b64 exec, exec
-; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_add_f64 v[8:9], v[10:11], v[8:9]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1064-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1064-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1064-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
-; GFX1064-DPP-NEXT: v_readlane_b32 s3, v9, 0
-; GFX1064-DPP-NEXT: v_readlane_b32 s2, v8, 0
-; GFX1064-DPP-NEXT: v_readlane_b32 s5, v9, 32
-; GFX1064-DPP-NEXT: v_readlane_b32 s4, v8, 32
-; GFX1064-DPP-NEXT: v_add_f64 v[8:9], s[2:3], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1]
+; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v41, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1064-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1064-DPP-NEXT: .LBB10_3:
+; GFX1064-DPP-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1064-DPP-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1032-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1032-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1032-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1032-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1032-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1032-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s6
+; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s7
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1032-DPP-NEXT: v_bfrev_b32_e32 v9, 1
-; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1032-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1032-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_add_f64 v[8:9], v[10:11], v[8:9]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1032-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1032-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1032-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
-; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
+; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s44, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1032-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1032-DPP-NEXT: .LBB10_3:
+; GFX1032-DPP-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1032-DPP-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1032-DPP-NEXT: s_endpgm
;
; GFX1164-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1164-DPP: ; %bb.0:
-; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
+; GFX1164-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1164-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1164-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1164-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1164-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1164-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v0
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1164-DPP-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s6
; GFX1164-DPP-NEXT: s_mov_b32 s13, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s41, s6
+; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1164-DPP-NEXT: v_bfrev_b32_e32 v9, 1
-; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1164-DPP-NEXT: s_not_b64 exec, exec
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1164-DPP-NEXT: s_not_b64 exec, exec
-; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfff
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_add_f64 v[8:9], v[10:11], v[8:9]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1164-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
-; GFX1164-DPP-NEXT: v_permlane64_b32 v11, v9
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_permlane64_b32 v10, v8
-; GFX1164-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
-; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v41, v8
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-DPP-NEXT: .p2align 6
+; GFX1164-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1164-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: s_clause 0x1
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1164-DPP-NEXT: .LBB10_3:
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-DPP-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1164-DPP-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1164-DPP-NEXT: s_endpgm
;
; GFX1132-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1132-DPP: ; %bb.0:
-; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1132-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1132-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1132-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1132-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b32 s40, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s41, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1132-DPP-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
+; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
-; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
+; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v9, 1
-; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1132-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1132-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_add_f64 v[8:9], v[10:11], v[8:9]
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v11, v9 :: v_dual_mov_b32 v10, v8
-; GFX1132-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1132-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
-; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v41, v8
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s44, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-DPP-NEXT: .p2align 6
+; GFX1132-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1132-DPP-NEXT: s_clause 0x1
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB10_2
-; GFX1132-DPP-NEXT: .LBB10_3:
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-DPP-NEXT: s_cbranch_vccnz .LBB10_2
+; GFX1132-DPP-NEXT: .LBB10_3: ; %UnifiedReturnBlock
; GFX1132-DPP-NEXT: s_endpgm
%divValue = call double @div.float.value()
%result = atomicrmw fsub ptr addrspace(1) %ptr, double %divValue syncscope("agent") monotonic, align 4
@@ -12250,932 +10762,252 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp(ptr addrspace(1) %ptr) #2 {
; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX7LESS: ; %bb.0:
-; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v5, exec_lo, 0
-; GFX7LESS-NEXT: s_bcnt1_i32_b64 s0, exec
-; GFX7LESS-NEXT: s_mov_b32 s1, 0x43300000
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v4, 0xc3300000
-; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v5, exec_hi, v5
-; GFX7LESS-NEXT: v_add_f64 v[3:4], s[0:1], v[3:4]
-; GFX7LESS-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
+; GFX7LESS-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s14, -1
+; GFX7LESS-NEXT: s_mov_b32 s15, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s12, s12, s9
+; GFX7LESS-NEXT: s_addc_u32 s13, s13, 0
+; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
+; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
+; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX7LESS-NEXT: s_cbranch_execz .LBB16_3
; GFX7LESS-NEXT: ; %bb.1:
-; GFX7LESS-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x9
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX7LESS-NEXT: s_mov_b64 s[44:45], 0
-; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, s0
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, s1
-; GFX7LESS-NEXT: v_or_b32_e32 v40, v3, v2
+; GFX7LESS-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], -v[41:42]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-NEXT: s_waitcnt expcnt(2)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s42
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s43
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX7LESS-NEXT: s_cbranch_execnz .LBB16_2
-; GFX7LESS-NEXT: .LBB16_3:
+; GFX7LESS-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX7LESS-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s9
-; GFX9-NEXT: v_mov_b32_e32 v3, 0
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-NEXT: s_bcnt1_i32_b64 s0, exec
-; GFX9-NEXT: v_mov_b32_e32 v4, 0xc3300000
-; GFX9-NEXT: s_mov_b32 s1, 0x43300000
-; GFX9-NEXT: v_add_f64 v[3:4], s[0:1], v[3:4]
-; GFX9-NEXT: s_movk_i32 s32, 0x800
-; GFX9-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
-; GFX9-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX9-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s14, -1
+; GFX9-NEXT: s_mov_b32 s15, 0xe00000
+; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX9-NEXT: s_add_u32 s12, s12, s9
+; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX9-NEXT: s_addc_u32 s13, s13, 0
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-NEXT: s_cbranch_execz .LBB16_3
; GFX9-NEXT: ; %bb.1:
-; GFX9-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX9-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX9-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX9-NEXT: s_mov_b32 s33, s8
-; GFX9-NEXT: s_mov_b32 s40, s7
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX9-NEXT: s_mov_b32 s41, s6
-; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-NEXT: v_mov_b32_e32 v1, s0
-; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3
+; GFX9-NEXT: s_and_b64 vcc, exec, -1
; GFX9-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-NEXT: s_cbranch_execnz .LBB16_2
-; GFX9-NEXT: .LBB16_3:
+; GFX9-NEXT: s_mov_b64 vcc, vcc
+; GFX9-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX9-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-NEXT: s_bcnt1_i32_b64 s0, exec
-; GFX1064-NEXT: s_mov_b32 s1, 0x43300000
-; GFX1064-NEXT: s_movk_i32 s32, 0x800
-; GFX1064-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1]
-; GFX1064-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
-; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1064-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s14, -1
+; GFX1064-NEXT: s_mov_b32 s15, 0x31e16000
+; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX1064-NEXT: s_add_u32 s12, s12, s9
+; GFX1064-NEXT: s_addc_u32 s13, s13, 0
+; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-NEXT: s_cbranch_execz .LBB16_3
; GFX1064-NEXT: ; %bb.1:
-; GFX1064-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1064-NEXT: s_mov_b32 s33, s8
-; GFX1064-NEXT: s_mov_b32 s40, s7
-; GFX1064-NEXT: s_mov_b32 s41, s6
-; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1064-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: v_mov_b32_e32 v2, s1
-; GFX1064-NEXT: v_mov_b32_e32 v1, s0
+; GFX1064-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1064-NEXT: s_add_u32 s8, s36, 44
-; GFX1064-NEXT: s_addc_u32 s9, s37, 0
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-NEXT: s_cbranch_execnz .LBB16_2
-; GFX1064-NEXT: .LBB16_3:
+; GFX1064-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX1064-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-NEXT: s_bcnt1_i32_b32 s0, exec_lo
-; GFX1032-NEXT: s_mov_b32 s1, 0x43300000
-; GFX1032-NEXT: s_mov_b32 s44, 0
-; GFX1032-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1]
-; GFX1032-NEXT: s_movk_i32 s32, 0x400
-; GFX1032-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
-; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
+; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s14, -1
+; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000
+; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1032-NEXT: s_add_u32 s12, s12, s9
+; GFX1032-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB16_3
; GFX1032-NEXT: ; %bb.1:
-; GFX1032-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1032-NEXT: s_mov_b32 s33, s8
-; GFX1032-NEXT: s_mov_b32 s40, s7
-; GFX1032-NEXT: s_mov_b32 s41, s6
-; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1032-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: v_mov_b32_e32 v2, s1
-; GFX1032-NEXT: v_mov_b32_e32 v1, s0
+; GFX1032-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1032-NEXT: s_add_u32 s8, s36, 44
-; GFX1032-NEXT: s_addc_u32 s9, s37, 0
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-NEXT: s_cbranch_execnz .LBB16_2
-; GFX1032-NEXT: .LBB16_3:
+; GFX1032-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX1032-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX1032-NEXT: s_endpgm
;
; GFX1164-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX1164: ; %bb.0:
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[0:1]
+; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-NEXT: s_bcnt1_i32_b64 s0, exec
-; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: v_mov_b32_e32 v0, 0x43300000
-; GFX1164-NEXT: v_mov_b32_e32 v1, s0
-; GFX1164-NEXT: s_mov_b32 s32, 32
-; GFX1164-NEXT: s_clause 0x1
-; GFX1164-NEXT: scratch_store_b32 off, v0, off offset:20
-; GFX1164-NEXT: scratch_store_b32 off, v1, off offset:16
-; GFX1164-NEXT: scratch_load_b64 v[0:1], off, off offset:16
+; GFX1164-NEXT: v_mov_b32_e32 v1, 0x43300000
+; GFX1164-NEXT: v_mov_b32_e32 v2, s0
; GFX1164-NEXT: s_mov_b64 s[0:1], exec
-; GFX1164-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1164-NEXT: v_mul_f64 v[41:42], 4.0, v[0:1]
-; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1164-NEXT: s_clause 0x1
+; GFX1164-NEXT: scratch_store_b32 off, v1, off offset:4
+; GFX1164-NEXT: scratch_store_b32 off, v2, off
; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-NEXT: s_cbranch_execz .LBB16_3
; GFX1164-NEXT: ; %bb.1:
-; GFX1164-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1164-NEXT: s_mov_b32 s33, s8
-; GFX1164-NEXT: s_mov_b32 s40, s7
-; GFX1164-NEXT: s_mov_b32 s41, s6
-; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: v_mov_b32_e32 v2, s1
-; GFX1164-NEXT: v_mov_b32_e32 v1, s0
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-NEXT: .p2align 6
+; GFX1164-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1164-NEXT: s_add_u32 s8, s36, 44
-; GFX1164-NEXT: s_addc_u32 s9, s37, 0
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-NEXT: s_mov_b32 s12, s41
-; GFX1164-NEXT: s_mov_b32 s13, s40
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: s_clause 0x1
-; GFX1164-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-NEXT: s_cbranch_execnz .LBB16_2
-; GFX1164-NEXT: .LBB16_3:
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX1164-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX1164-NEXT: s_endpgm
;
; GFX1132-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX1132: ; %bb.0:
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[0:1]
; GFX1132-NEXT: s_bcnt1_i32_b32 s0, exec_lo
-; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v1, s0
-; GFX1132-NEXT: v_mov_b32_e32 v0, 0x43300000
-; GFX1132-NEXT: s_mov_b32 s44, 0
-; GFX1132-NEXT: s_clause 0x1
-; GFX1132-NEXT: scratch_store_b32 off, v0, off offset:20
-; GFX1132-NEXT: scratch_store_b32 off, v1, off offset:16
-; GFX1132-NEXT: scratch_load_b64 v[0:1], off, off offset:16
-; GFX1132-NEXT: s_mov_b32 s32, 32
-; GFX1132-NEXT: s_mov_b32 s0, exec_lo
-; GFX1132-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1132-NEXT: v_mul_f64 v[41:42], 4.0, v[0:1]
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132-NEXT: v_dual_mov_b32 v1, 0x43300000 :: v_dual_mov_b32 v2, s0
+; GFX1132-NEXT: s_mov_b32 s0, exec_lo
+; GFX1132-NEXT: s_clause 0x1
+; GFX1132-NEXT: scratch_store_b32 off, v1, off offset:4
+; GFX1132-NEXT: scratch_store_b32 off, v2, off
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_cbranch_execz .LBB16_3
; GFX1132-NEXT: ; %bb.1:
-; GFX1132-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: s_mov_b32 s40, s14
-; GFX1132-NEXT: s_mov_b32 s41, s13
-; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-NEXT: .p2align 6
+; GFX1132-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1132-NEXT: s_add_u32 s8, s36, 44
-; GFX1132-NEXT: s_addc_u32 s9, s37, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-NEXT: s_mov_b32 s12, s41
-; GFX1132-NEXT: s_mov_b32 s13, s40
-; GFX1132-NEXT: s_mov_b32 s14, s33
-; GFX1132-NEXT: s_clause 0x1
-; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-NEXT: s_cbranch_execnz .LBB16_2
-; GFX1132-NEXT: .LBB16_3:
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX1132-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX1132-NEXT: s_endpgm
;
; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX7LESS-DPP: ; %bb.0:
-; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v5, exec_lo, 0
-; GFX7LESS-DPP-NEXT: s_bcnt1_i32_b64 s0, exec
-; GFX7LESS-DPP-NEXT: s_mov_b32 s1, 0x43300000
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, 0xc3300000
-; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v5, exec_hi, v5
-; GFX7LESS-DPP-NEXT: v_add_f64 v[3:4], s[0:1], v[3:4]
-; GFX7LESS-DPP-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s14, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s15, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s12, s12, s9
+; GFX7LESS-DPP-NEXT: s_addc_u32 s13, s13, 0
+; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
+; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
+; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX7LESS-DPP-NEXT: ; %bb.1:
-; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x9
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, s0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, s1
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v3, v2
+; GFX7LESS-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-DPP-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], -v[41:42]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(2)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB16_2
-; GFX7LESS-DPP-NEXT: .LBB16_3:
+; GFX7LESS-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-DPP-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX7LESS-DPP-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, 0
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s0, exec
-; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0xc3300000
-; GFX9-DPP-NEXT: s_mov_b32 s1, 0x43300000
-; GFX9-DPP-NEXT: v_add_f64 v[3:4], s[0:1], v[3:4]
-; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX9-DPP-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
-; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX9-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s14, -1
+; GFX9-DPP-NEXT: s_mov_b32 s15, 0xe00000
+; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX9-DPP-NEXT: s_add_u32 s12, s12, s9
+; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX9-DPP-NEXT: s_addc_u32 s13, s13, 0
+; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX9-DPP-NEXT: s_mov_b32 s33, s8
-; GFX9-DPP-NEXT: s_mov_b32 s40, s7
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX9-DPP-NEXT: s_mov_b32 s41, s6
-; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0
-; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
+; GFX9-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX9-DPP-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX9-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-DPP-NEXT: s_cbranch_execnz .LBB16_2
-; GFX9-DPP-NEXT: .LBB16_3:
+; GFX9-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX9-DPP-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX9-DPP-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s0, exec
-; GFX1064-DPP-NEXT: s_mov_b32 s1, 0x43300000
-; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX1064-DPP-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1]
-; GFX1064-DPP-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
-; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1064-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s14, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s15, 0x31e16000
+; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
+; GFX1064-DPP-NEXT: s_add_u32 s12, s12, s9
+; GFX1064-DPP-NEXT: s_addc_u32 s13, s13, 0
+; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1064-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1064-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1064-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, s0
+; GFX1064-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-DPP-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1064-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB16_2
-; GFX1064-DPP-NEXT: .LBB16_3:
+; GFX1064-DPP-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX1064-DPP-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-DPP-NEXT: s_bcnt1_i32_b32 s0, exec_lo
-; GFX1032-DPP-NEXT: s_mov_b32 s1, 0x43300000
-; GFX1032-DPP-NEXT: s_mov_b32 s44, 0
-; GFX1032-DPP-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1]
-; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
-; GFX1032-DPP-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
-; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
+; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s14, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000
+; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s9
+; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x24
-; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
-; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
-; GFX1032-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1032-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1032-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
-; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[42:43], 0x0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, s0
+; GFX1032-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-DPP-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1032-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB16_2
-; GFX1032-DPP-NEXT: .LBB16_3:
+; GFX1032-DPP-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX1032-DPP-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX1032-DPP-NEXT: s_endpgm
;
; GFX1164-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX1164-DPP: ; %bb.0:
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
+; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-DPP-NEXT: s_bcnt1_i32_b64 s0, exec
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0x43300000
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0
-; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
-; GFX1164-DPP-NEXT: s_clause 0x1
-; GFX1164-DPP-NEXT: scratch_store_b32 off, v0, off offset:20
-; GFX1164-DPP-NEXT: scratch_store_b32 off, v1, off offset:16
-; GFX1164-DPP-NEXT: scratch_load_b64 v[0:1], off, off offset:16
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0x43300000
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s0
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
-; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_mul_f64 v[41:42], 4.0, v[0:1]
-; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1164-DPP-NEXT: s_clause 0x1
+; GFX1164-DPP-NEXT: scratch_store_b32 off, v1, off offset:4
+; GFX1164-DPP-NEXT: scratch_store_b32 off, v2, off
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1164-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1164-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-DPP-NEXT: .p2align 6
+; GFX1164-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-DPP-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX1164-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1164-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: s_clause 0x1
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB16_2
-; GFX1164-DPP-NEXT: .LBB16_3:
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-DPP-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX1164-DPP-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX1164-DPP-NEXT: s_endpgm
;
; GFX1132-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX1132-DPP: ; %bb.0:
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
; GFX1132-DPP-NEXT: s_bcnt1_i32_b32 s0, exec_lo
-; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v1, s0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0x43300000
-; GFX1132-DPP-NEXT: s_mov_b32 s44, 0
-; GFX1132-DPP-NEXT: s_clause 0x1
-; GFX1132-DPP-NEXT: scratch_store_b32 off, v0, off offset:20
-; GFX1132-DPP-NEXT: scratch_store_b32 off, v1, off offset:16
-; GFX1132-DPP-NEXT: scratch_load_b64 v[0:1], off, off offset:16
-; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
-; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
-; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1]
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_mul_f64 v[41:42], 4.0, v[0:1]
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0x43300000 :: v_dual_mov_b32 v2, s0
+; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
+; GFX1132-DPP-NEXT: s_clause 0x1
+; GFX1132-DPP-NEXT: scratch_store_b32 off, v1, off offset:4
+; GFX1132-DPP-NEXT: scratch_store_b32 off, v2, off
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[42:43], s[2:3], 0x24
-; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s40, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s41, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[42:43], 0x0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-DPP-NEXT: .p2align 6
+; GFX1132-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-DPP-NEXT: .LBB16_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1132-DPP-NEXT: s_clause 0x1
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB16_2
-; GFX1132-DPP-NEXT: .LBB16_3:
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-DPP-NEXT: s_cbranch_vccnz .LBB16_2
+; GFX1132-DPP-NEXT: .LBB16_3: ; %UnifiedReturnBlock
; GFX1132-DPP-NEXT: s_endpgm
%result = atomicrmw fsub ptr addrspace(1) %ptr, double 4.0 monotonic, align 4
ret void
@@ -13184,42 +11016,35 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp(ptr addrspace(1) %ptr) #2 {
; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX7LESS: ; %bb.0:
-; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX7LESS-NEXT: s_movk_i32 s32, 0x400
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s9
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s14, s8
+; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX7LESS-NEXT: s_add_u32 s8, s2, 44
+; GFX7LESS-NEXT: s_addc_u32 s9, s3, 0
+; GFX7LESS-NEXT: s_getpc_b64 s[2:3]
+; GFX7LESS-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX7LESS-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
+; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX7LESS-NEXT: s_mov_b32 s12, s6
+; GFX7LESS-NEXT: s_mov_b32 s13, s7
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
-; GFX7LESS-NEXT: v_mov_b32_e32 v41, 0
-; GFX7LESS-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0
+; GFX7LESS-NEXT: v_bfrev_b32_e32 v3, 1
; GFX7LESS-NEXT: .LBB17_1: ; %ComputeLoop
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_ff1_i32_b64 s4, s[0:1]
@@ -13229,7 +11054,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX7LESS-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5]
; GFX7LESS-NEXT: v_cmp_ne_u64_e64 s[4:5], s[0:1], 0
; GFX7LESS-NEXT: s_and_b64 vcc, exec, s[4:5]
-; GFX7LESS-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX7LESS-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX7LESS-NEXT: s_cbranch_vccnz .LBB17_1
; GFX7LESS-NEXT: ; %bb.2: ; %ComputeEnd
; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
@@ -13239,99 +11064,50 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB17_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s46, -1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-NEXT: s_mov_b64 s[42:43], 0
+; GFX7LESS-NEXT: s_and_b64 vcc, exec, 0
; GFX7LESS-NEXT: .LBB17_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], -v[41:42]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
-; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-NEXT: s_waitcnt expcnt(2)
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[42:43], vcc, s[42:43]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[42:43]
-; GFX7LESS-NEXT: s_cbranch_execnz .LBB17_4
-; GFX7LESS-NEXT: .LBB17_5:
+; GFX7LESS-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-NEXT: s_cbranch_vccz .LBB17_4
+; GFX7LESS-NEXT: .LBB17_5: ; %UnifiedReturnBlock
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s9
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-NEXT: s_mov_b32 s33, s8
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX9-NEXT: s_mov_b32 s40, s7
-; GFX9-NEXT: s_mov_b32 s41, s6
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s9
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_add_u32 s8, s2, 44
+; GFX9-NEXT: s_addc_u32 s9, s3, 0
+; GFX9-NEXT: s_getpc_b64 s[2:3]
+; GFX9-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX9-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
+; GFX9-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
+; GFX9-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: s_movk_i32 s32, 0x800
+; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: v_mov_b32_e32 v41, 0
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_mov_b64 s[0:1], exec
-; GFX9-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX9-NEXT: v_bfrev_b32_e32 v3, 1
; GFX9-NEXT: .LBB17_1: ; %ComputeLoop
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: s_ff1_i32_b64 s4, s[0:1]
; GFX9-NEXT: v_readlane_b32 s3, v1, s4
; GFX9-NEXT: v_readlane_b32 s2, v0, s4
-; GFX9-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX9-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX9-NEXT: s_lshl_b64 s[2:3], 1, s4
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3]
; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0
@@ -13344,96 +11120,50 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX9-NEXT: s_cbranch_execz .LBB17_5
; GFX9-NEXT: ; %bb.3:
-; GFX9-NEXT: s_load_dwordx2 s[42:43], s[36:37], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-NEXT: s_mov_b64 s[44:45], 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX9-NEXT: s_and_b64 vcc, exec, 0
; GFX9-NEXT: .LBB17_4: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_getpc_b64 s[0:1]
-; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s41
-; GFX9-NEXT: s_mov_b32 s13, s40
-; GFX9-NEXT: s_mov_b32 s14, s33
-; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s42
-; GFX9-NEXT: v_mov_b32_e32 v3, s43
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX9-NEXT: s_cbranch_execnz .LBB17_4
-; GFX9-NEXT: .LBB17_5:
+; GFX9-NEXT: s_mov_b64 vcc, vcc
+; GFX9-NEXT: s_cbranch_vccz .LBB17_4
+; GFX9-NEXT: .LBB17_5: ; %UnifiedReturnBlock
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b32 s33, s8
-; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX1064-NEXT: s_mov_b32 s40, s7
-; GFX1064-NEXT: s_mov_b32 s41, s6
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s9
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_mov_b32 s14, s8
+; GFX1064-NEXT: s_add_u32 s8, s2, 44
+; GFX1064-NEXT: s_addc_u32 s9, s3, 0
+; GFX1064-NEXT: s_getpc_b64 s[2:3]
+; GFX1064-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1064-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1064-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: s_movk_i32 s32, 0x800
+; GFX1064-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b32 s12, s6
+; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1064-NEXT: s_mov_b32 s13, s7
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: v_mov_b32_e32 v41, 0
-; GFX1064-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1064-NEXT: v_mov_b32_e32 v2, 0
+; GFX1064-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1064-NEXT: s_mov_b64 s[0:1], exec
; GFX1064-NEXT: .LBB17_1: ; %ComputeLoop
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-NEXT: s_ff1_i32_b64 s4, s[0:1]
; GFX1064-NEXT: v_readlane_b32 s3, v1, s4
; GFX1064-NEXT: v_readlane_b32 s2, v0, s4
-; GFX1064-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX1064-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX1064-NEXT: s_lshl_b64 s[2:3], 1, s4
; GFX1064-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3]
; GFX1064-NEXT: s_cmp_lg_u64 s[0:1], 0
@@ -13446,90 +11176,42 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1064-NEXT: s_cbranch_execz .LBB17_5
; GFX1064-NEXT: ; %bb.3:
-; GFX1064-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1064-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1064-NEXT: s_and_b64 vcc, exec, 0
; GFX1064-NEXT: .LBB17_4: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s41
-; GFX1064-NEXT: s_mov_b32 s13, s40
-; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-NEXT: s_cbranch_execnz .LBB17_4
-; GFX1064-NEXT: .LBB17_5:
+; GFX1064-NEXT: s_cbranch_vccz .LBB17_4
+; GFX1064-NEXT: .LBB17_5: ; %UnifiedReturnBlock
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b32 s33, s8
-; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX1032-NEXT: s_mov_b32 s40, s7
-; GFX1032-NEXT: s_mov_b32 s41, s6
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: s_movk_i32 s32, 0x400
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-NEXT: v_mov_b32_e32 v41, 0
-; GFX1032-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s9
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_mov_b32 s14, s8
+; GFX1032-NEXT: s_add_u32 s8, s2, 44
+; GFX1032-NEXT: s_addc_u32 s9, s3, 0
+; GFX1032-NEXT: s_getpc_b64 s[2:3]
+; GFX1032-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1032-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
+; GFX1032-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
+; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GFX1032-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b32 s12, s6
+; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1032-NEXT: s_mov_b32 s13, s7
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1032-NEXT: v_mov_b32_e32 v2, 0
+; GFX1032-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-NEXT: .LBB17_1: ; %ComputeLoop
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -13538,90 +11220,42 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1032-NEXT: v_readlane_b32 s2, v0, s1
; GFX1032-NEXT: s_lshl_b32 s1, 1, s1
; GFX1032-NEXT: s_andn2_b32 s0, s0, s1
-; GFX1032-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX1032-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX1032-NEXT: s_cmp_lg_u32 s0, 0
; GFX1032-NEXT: s_cbranch_scc1 .LBB17_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s44, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1032-NEXT: s_cbranch_execz .LBB17_5
; GFX1032-NEXT: ; %bb.3:
-; GFX1032-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1032-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1032-NEXT: s_mov_b32 vcc_lo, 0
; GFX1032-NEXT: .LBB17_4: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s41
-; GFX1032-NEXT: s_mov_b32 s13, s40
-; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-NEXT: s_cbranch_execnz .LBB17_4
-; GFX1032-NEXT: .LBB17_5:
+; GFX1032-NEXT: s_cbranch_vccz .LBB17_4
+; GFX1032-NEXT: .LBB17_5: ; %UnifiedReturnBlock
; GFX1032-NEXT: s_endpgm
;
; GFX1164-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX1164: ; %bb.0:
-; GFX1164-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1164-NEXT: s_mov_b32 s33, s8
-; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
+; GFX1164-NEXT: s_mov_b32 s14, s8
+; GFX1164-NEXT: s_add_u32 s8, s2, 44
+; GFX1164-NEXT: s_addc_u32 s9, s3, 0
+; GFX1164-NEXT: s_getpc_b64 s[2:3]
+; GFX1164-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1164-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1164-NEXT: v_mov_b32_e32 v31, v0
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1164-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1164-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b32 s12, s6
; GFX1164-NEXT: s_mov_b32 s13, s7
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: s_mov_b32 s32, 32
-; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b32 s40, s7
-; GFX1164-NEXT: s_mov_b32 s41, s6
+; GFX1164-NEXT: s_mov_b32 s32, 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-NEXT: v_mov_b32_e32 v41, 0
-; GFX1164-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX1164-NEXT: s_swappc_b64 s[30:31], s[2:3]
+; GFX1164-NEXT: v_mov_b32_e32 v2, 0
+; GFX1164-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1164-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-NEXT: .LBB17_1: ; %ComputeLoop
; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -13630,7 +11264,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1164-NEXT: v_readlane_b32 s3, v1, s4
; GFX1164-NEXT: v_readlane_b32 s2, v0, s4
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX1164-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX1164-NEXT: s_lshl_b64 s[2:3], 1, s4
; GFX1164-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -13645,79 +11279,32 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1164-NEXT: s_cbranch_execz .LBB17_5
; GFX1164-NEXT: ; %bb.3:
-; GFX1164-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1164-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-NEXT: .p2align 6
+; GFX1164-NEXT: s_and_b64 vcc, exec, 0
; GFX1164-NEXT: .LBB17_4: ; %atomicrmw.start
; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s41
-; GFX1164-NEXT: s_mov_b32 s13, s40
-; GFX1164-NEXT: s_mov_b32 s14, s33
-; GFX1164-NEXT: s_clause 0x1
-; GFX1164-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-NEXT: s_cbranch_execnz .LBB17_4
-; GFX1164-NEXT: .LBB17_5:
-; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-NEXT: s_cbranch_vccz .LBB17_4
+; GFX1164-NEXT: .LBB17_5: ; %UnifiedReturnBlock
; GFX1164-NEXT: s_endpgm
;
; GFX1132-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX1132: ; %bb.0:
-; GFX1132-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_add_u32 s8, s2, 44
+; GFX1132-NEXT: s_addc_u32 s9, s3, 0
+; GFX1132-NEXT: s_getpc_b64 s[2:3]
+; GFX1132-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1132-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1132-NEXT: v_mov_b32_e32 v31, v0
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b32 s40, s14
-; GFX1132-NEXT: s_mov_b32 s41, s13
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1132-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
+; GFX1132-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b32 s13, s14
; GFX1132-NEXT: s_mov_b32 s14, s15
-; GFX1132-NEXT: s_mov_b32 s32, 32
-; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: v_mov_b32_e32 v40, v0
+; GFX1132-NEXT: s_mov_b32 s32, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-NEXT: v_mov_b32_e32 v41, 0
-; GFX1132-NEXT: v_bfrev_b32_e32 v42, 1
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[2:3]
+; GFX1132-NEXT: v_mov_b32_e32 v2, 0
+; GFX1132-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: .LBB17_1: ; %ComputeLoop
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -13728,780 +11315,239 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1132-NEXT: s_lshl_b32 s1, 1, s1
; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1132-NEXT: s_and_not1_b32 s0, s0, s1
-; GFX1132-NEXT: v_add_f64 v[41:42], v[41:42], s[2:3]
+; GFX1132-NEXT: v_add_f64 v[2:3], v[2:3], s[2:3]
; GFX1132-NEXT: s_cmp_lg_u32 s0, 0
; GFX1132-NEXT: s_cbranch_scc1 .LBB17_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s44, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1132-NEXT: s_cbranch_execz .LBB17_5
; GFX1132-NEXT: ; %bb.3:
-; GFX1132-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1132-NEXT: v_mov_b32_e32 v0, 0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-NEXT: .p2align 6
+; GFX1132-NEXT: s_mov_b32 vcc_lo, 0
; GFX1132-NEXT: .LBB17_4: ; %atomicrmw.start
; GFX1132-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1132-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-NEXT: s_mov_b32 s12, s41
-; GFX1132-NEXT: s_mov_b32 s13, s40
-; GFX1132-NEXT: s_mov_b32 s14, s33
-; GFX1132-NEXT: s_clause 0x1
-; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-NEXT: s_cbranch_execnz .LBB17_4
-; GFX1132-NEXT: .LBB17_5:
-; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-NEXT: s_cbranch_vccz .LBB17_4
+; GFX1132-NEXT: .LBB17_5: ; %UnifiedReturnBlock
; GFX1132-NEXT: s_endpgm
;
; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX7LESS-DPP: ; %bb.0:
-; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s8
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, s7
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, s6
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[2:3], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x400
+; GFX7LESS-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX7LESS-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s8
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX7LESS-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX7LESS-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX7LESS-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX7LESS-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX7LESS-DPP-NEXT: v_or_b32_e32 v42, v0, v2
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s6
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s7
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v41, v1
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[42:43], 0
+; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX7LESS-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX7LESS-DPP-NEXT: .LBB17_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], -v[40:41]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
-; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(2)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s41
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s40
-; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
-; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[42:43], vcc, s[42:43]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[42:43]
-; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB17_1
-; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX7LESS-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX7LESS-DPP-NEXT: s_cbranch_vccnz .LBB17_1
+; GFX7LESS-DPP-NEXT: ; %bb.2: ; %DummyReturnBlock
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b32 s33, s8
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_mov_b32 s40, s7
-; GFX9-DPP-NEXT: s_mov_b32 s41, s6
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s14, s8
+; GFX9-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX9-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX9-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX9-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX9-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
+; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
+; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-DPP-NEXT: s_mov_b32 s12, s6
+; GFX9-DPP-NEXT: s_mov_b32 s13, s7
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX9-DPP-NEXT: v_bfrev_b32_e32 v9, 1
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX9-DPP-NEXT: s_not_b64 exec, exec
-; GFX9-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX9-DPP-NEXT: s_not_b64 exec, exec
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:1 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:1 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:2 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:2 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_shr:8 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_shr:8 row_mask:0xf bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-DPP-NEXT: s_nop 0
-; GFX9-DPP-NEXT: v_mov_b32_dpp v12, v10 row_bcast:15 row_mask:0xa bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v13, v11 row_bcast:15 row_mask:0xa bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX9-DPP-NEXT: s_nop 1
-; GFX9-DPP-NEXT: v_mov_b32_dpp v8, v10 row_bcast:31 row_mask:0xc bank_mask:0xf
-; GFX9-DPP-NEXT: v_mov_b32_dpp v9, v11 row_bcast:31 row_mask:0xc bank_mask:0xf
-; GFX9-DPP-NEXT: v_add_f64 v[8:9], v[10:11], v[8:9]
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
+; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
-; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_readlane_b32 s43, v9, 63
-; GFX9-DPP-NEXT: v_readlane_b32 s42, v8, 63
-; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX9-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX9-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX9-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -s[42:43]
-; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX9-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s41
-; GFX9-DPP-NEXT: s_mov_b32 s13, s40
-; GFX9-DPP-NEXT: s_mov_b32 s14, s33
-; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45
-; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
-; GFX9-DPP-NEXT: s_cbranch_execnz .LBB17_2
-; GFX9-DPP-NEXT: .LBB17_3:
+; GFX9-DPP-NEXT: s_mov_b64 vcc, vcc
+; GFX9-DPP-NEXT: s_cbranch_vccnz .LBB17_2
+; GFX9-DPP-NEXT: .LBB17_3: ; %UnifiedReturnBlock
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1064-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1064-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1064-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1064-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1064-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1064-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
+; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s6
+; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s7
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1064-DPP-NEXT: v_bfrev_b32_e32 v9, 1
-; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1064-DPP-NEXT: s_not_b64 exec, exec
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1064-DPP-NEXT: s_not_b64 exec, exec
-; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1064-DPP-NEXT: v_add_f64 v[8:9], v[10:11], v[8:9]
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1064-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1064-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1064-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
-; GFX1064-DPP-NEXT: v_readlane_b32 s3, v9, 0
-; GFX1064-DPP-NEXT: v_readlane_b32 s2, v8, 0
-; GFX1064-DPP-NEXT: v_readlane_b32 s5, v9, 32
-; GFX1064-DPP-NEXT: v_readlane_b32 s4, v8, 32
-; GFX1064-DPP-NEXT: v_add_f64 v[8:9], s[2:3], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 exec, s[0:1]
+; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v41, v8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1064-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1064-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[44:45]
-; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB17_2
-; GFX1064-DPP-NEXT: .LBB17_3:
+; GFX1064-DPP-NEXT: s_cbranch_vccnz .LBB17_2
+; GFX1064-DPP-NEXT: .LBB17_3: ; %UnifiedReturnBlock
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s9
-; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1032-DPP-NEXT: s_mov_b32 s41, s6
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s9
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1032-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1032-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1032-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1032-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1032-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
+; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s6
+; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s7
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1032-DPP-NEXT: v_bfrev_b32_e32 v9, 1
-; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1032-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1032-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1032-DPP-NEXT: v_add_f64 v[8:9], v[10:11], v[8:9]
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1032-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1032-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1032-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
-; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
+; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s44, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[42:43], s[34:35], 0x24
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[42:43]
+; GFX1032-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1032-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
-; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB17_2
-; GFX1032-DPP-NEXT: .LBB17_3:
+; GFX1032-DPP-NEXT: s_cbranch_vccnz .LBB17_2
+; GFX1032-DPP-NEXT: .LBB17_3: ; %UnifiedReturnBlock
; GFX1032-DPP-NEXT: s_endpgm
;
; GFX1164-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX1164-DPP: ; %bb.0:
-; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b32 s33, s8
-; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
+; GFX1164-DPP-NEXT: s_mov_b32 s14, s8
+; GFX1164-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1164-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1164-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1164-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1164-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v0
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1164-DPP-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s6
; GFX1164-DPP-NEXT: s_mov_b32 s13, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b32 s40, s7
-; GFX1164-DPP-NEXT: s_mov_b32 s41, s6
+; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1164-DPP-NEXT: v_bfrev_b32_e32 v9, 1
-; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1164-DPP-NEXT: s_not_b64 exec, exec
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1164-DPP-NEXT: s_not_b64 exec, exec
-; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfff
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v12, v8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v13, v9
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1164-DPP-NEXT: v_add_f64 v[8:9], v[10:11], v[8:9]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1164-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1164-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
-; GFX1164-DPP-NEXT: v_permlane64_b32 v11, v9
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1164-DPP-NEXT: v_permlane64_b32 v10, v8
-; GFX1164-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
-; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v41, v8
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[44:45], 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1164-DPP-NEXT: .p2align 6
+; GFX1164-DPP-NEXT: s_and_b64 vcc, exec, -1
; GFX1164-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
; GFX1164-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1164-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1164-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1164-DPP-NEXT: s_clause 0x1
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s42
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s43
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
-; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1164-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[44:45], vcc, s[44:45]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[44:45]
-; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB17_2
-; GFX1164-DPP-NEXT: .LBB17_3:
-; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1164-DPP-NEXT: s_cbranch_vccnz .LBB17_2
+; GFX1164-DPP-NEXT: .LBB17_3: ; %UnifiedReturnBlock
; GFX1164-DPP-NEXT: s_endpgm
;
; GFX1132-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX1132-DPP: ; %bb.0:
-; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[2:3]
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_add_u32 s8, s2, 44
+; GFX1132-DPP-NEXT: s_addc_u32 s9, s3, 0
+; GFX1132-DPP-NEXT: s_getpc_b64 s[2:3]
+; GFX1132-DPP-NEXT: s_add_u32 s2, s2, div.float.value at gotpcrel32@lo+4
+; GFX1132-DPP-NEXT: s_addc_u32 s3, s3, div.float.value at gotpcrel32@hi+12
; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b32 s40, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s41, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
+; GFX1132-DPP-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
+; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
-; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
+; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v8, 0
-; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v9, 1
-; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v10, v0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v11, v1
-; GFX1132-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v10, v8
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v11, v9
-; GFX1132-DPP-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:2 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v12, v8 :: v_dual_mov_b32 v13, v9
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v12, v10 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v13, v11 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_add_f64 v[10:11], v[10:11], v[12:13]
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v8, v10 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_mov_b32_dpp v9, v11 row_xmask:8 row_mask:0xf bank_mask:0xf
-; GFX1132-DPP-NEXT: v_add_f64 v[8:9], v[10:11], v[8:9]
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v11, v9 :: v_dual_mov_b32 v10, v8
-; GFX1132-DPP-NEXT: v_permlanex16_b32 v11, v11, -1, -1
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1132-DPP-NEXT: v_permlanex16_b32 v10, v10, -1, -1
-; GFX1132-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
-; GFX1132-DPP-NEXT: s_mov_b32 exec_lo, s0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v41, v8
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s44, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[42:43], s[34:35], 0x24
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[42:43]
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
-; GFX1132-DPP-NEXT: .p2align 6
+; GFX1132-DPP-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX1132-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
; GFX1132-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX1132-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -v[41:42]
-; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
-; GFX1132-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
-; GFX1132-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
-; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[38:39]
-; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s41
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s40
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1132-DPP-NEXT: s_clause 0x1
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s42
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s43 :: v_dual_mov_b32 v4, 0
-; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
-; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s44, vcc_lo, s44
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s44
-; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB17_2
-; GFX1132-DPP-NEXT: .LBB17_3:
-; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX1132-DPP-NEXT: s_cbranch_vccnz .LBB17_2
+; GFX1132-DPP-NEXT: .LBB17_3: ; %UnifiedReturnBlock
; GFX1132-DPP-NEXT: s_endpgm
%divValue = call double @div.float.value() strictfp
%result = atomicrmw fsub ptr addrspace(1) %ptr, double %divValue monotonic, align 4
diff --git a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
index 161692137fc30b..b3e977b1af58e5 100644
--- a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
+++ b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
@@ -8452,8 +8452,8 @@ define i64 @test_max_i64() {
; CHECK-THUMB8BASE: @ %bb.0: @ %entry
; CHECK-THUMB8BASE-NEXT: .save {r4, lr}
; CHECK-THUMB8BASE-NEXT: push {r4, lr}
-; CHECK-THUMB8BASE-NEXT: .pad #72
-; CHECK-THUMB8BASE-NEXT: sub sp, #72
+; CHECK-THUMB8BASE-NEXT: .pad #88
+; CHECK-THUMB8BASE-NEXT: sub sp, #88
; CHECK-THUMB8BASE-NEXT: movw r1, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r1, :upper16:atomic_i64
; CHECK-THUMB8BASE-NEXT: ldr r0, [r1, #4]
@@ -8504,33 +8504,42 @@ define i64 @test_max_i64() {
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB40_7: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB40_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #36] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #40] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #24] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r4, [sp, #64]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #68]
+; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r4, [sp, #80]
+; CHECK-THUMB8BASE-NEXT: str r3, [sp, #84]
+; CHECK-THUMB8BASE-NEXT: str r2, [sp, #76]
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #72]
+; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #76]
+; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #72]
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #4]
; CHECK-THUMB8BASE-NEXT: str r0, [sp]
; CHECK-THUMB8BASE-NEXT: movw r0, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r0, :upper16:atomic_i64
-; CHECK-THUMB8BASE-NEXT: add r1, sp, #64
+; CHECK-THUMB8BASE-NEXT: add r1, sp, #80
; CHECK-THUMB8BASE-NEXT: bl __atomic_compare_exchange_8
; CHECK-THUMB8BASE-NEXT: mov r2, r0
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #84]
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #68]
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #80]
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #64]
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #68]
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #64]
; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: lsls r2, r2, #24
; CHECK-THUMB8BASE-NEXT: cmp r2, #0
; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: beq .LBB40_1
+; CHECK-THUMB8BASE-NEXT: bne .LBB40_1
; CHECK-THUMB8BASE-NEXT: b .LBB40_8
; CHECK-THUMB8BASE-NEXT: .LBB40_8: @ %atomicrmw.end
; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: add sp, #72
+; CHECK-THUMB8BASE-NEXT: add sp, #88
; CHECK-THUMB8BASE-NEXT: pop {r4, pc}
entry:
%0 = atomicrmw max ptr @atomic_i64, i64 1 monotonic
@@ -8756,8 +8765,8 @@ define i64 @test_min_i64() {
; CHECK-THUMB8BASE: @ %bb.0: @ %entry
; CHECK-THUMB8BASE-NEXT: .save {r4, lr}
; CHECK-THUMB8BASE-NEXT: push {r4, lr}
-; CHECK-THUMB8BASE-NEXT: .pad #72
-; CHECK-THUMB8BASE-NEXT: sub sp, #72
+; CHECK-THUMB8BASE-NEXT: .pad #88
+; CHECK-THUMB8BASE-NEXT: sub sp, #88
; CHECK-THUMB8BASE-NEXT: movw r1, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r1, :upper16:atomic_i64
; CHECK-THUMB8BASE-NEXT: ldr r0, [r1, #4]
@@ -8808,33 +8817,42 @@ define i64 @test_min_i64() {
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB41_7: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB41_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #48] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #36] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #40] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #24] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r4, [sp, #64]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #68]
+; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r4, [sp, #80]
+; CHECK-THUMB8BASE-NEXT: str r3, [sp, #84]
+; CHECK-THUMB8BASE-NEXT: str r2, [sp, #76]
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #72]
+; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #76]
+; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #72]
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #4]
; CHECK-THUMB8BASE-NEXT: str r0, [sp]
; CHECK-THUMB8BASE-NEXT: movw r0, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r0, :upper16:atomic_i64
-; CHECK-THUMB8BASE-NEXT: add r1, sp, #64
+; CHECK-THUMB8BASE-NEXT: add r1, sp, #80
; CHECK-THUMB8BASE-NEXT: bl __atomic_compare_exchange_8
; CHECK-THUMB8BASE-NEXT: mov r2, r0
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #84]
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #68]
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #80]
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #64]
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #68]
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #64]
; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: lsls r2, r2, #24
; CHECK-THUMB8BASE-NEXT: cmp r2, #0
; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: beq .LBB41_1
+; CHECK-THUMB8BASE-NEXT: bne .LBB41_1
; CHECK-THUMB8BASE-NEXT: b .LBB41_8
; CHECK-THUMB8BASE-NEXT: .LBB41_8: @ %atomicrmw.end
; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: add sp, #72
+; CHECK-THUMB8BASE-NEXT: add sp, #88
; CHECK-THUMB8BASE-NEXT: pop {r4, pc}
entry:
%0 = atomicrmw min ptr @atomic_i64, i64 1 monotonic
@@ -9060,8 +9078,8 @@ define i64 @test_umax_i64() {
; CHECK-THUMB8BASE: @ %bb.0: @ %entry
; CHECK-THUMB8BASE-NEXT: .save {r4, lr}
; CHECK-THUMB8BASE-NEXT: push {r4, lr}
-; CHECK-THUMB8BASE-NEXT: .pad #72
-; CHECK-THUMB8BASE-NEXT: sub sp, #72
+; CHECK-THUMB8BASE-NEXT: .pad #88
+; CHECK-THUMB8BASE-NEXT: sub sp, #88
; CHECK-THUMB8BASE-NEXT: movw r1, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r1, :upper16:atomic_i64
; CHECK-THUMB8BASE-NEXT: ldr r0, [r1, #4]
@@ -9112,33 +9130,42 @@ define i64 @test_umax_i64() {
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB42_7: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB42_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #36] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #40] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #24] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r4, [sp, #64]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #68]
+; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r4, [sp, #80]
+; CHECK-THUMB8BASE-NEXT: str r3, [sp, #84]
+; CHECK-THUMB8BASE-NEXT: str r2, [sp, #76]
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #72]
+; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #76]
+; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #72]
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #4]
; CHECK-THUMB8BASE-NEXT: str r0, [sp]
; CHECK-THUMB8BASE-NEXT: movw r0, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r0, :upper16:atomic_i64
-; CHECK-THUMB8BASE-NEXT: add r1, sp, #64
+; CHECK-THUMB8BASE-NEXT: add r1, sp, #80
; CHECK-THUMB8BASE-NEXT: bl __atomic_compare_exchange_8
; CHECK-THUMB8BASE-NEXT: mov r2, r0
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #84]
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #68]
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #80]
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #64]
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #68]
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #64]
; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: lsls r2, r2, #24
; CHECK-THUMB8BASE-NEXT: cmp r2, #0
; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: beq .LBB42_1
+; CHECK-THUMB8BASE-NEXT: bne .LBB42_1
; CHECK-THUMB8BASE-NEXT: b .LBB42_8
; CHECK-THUMB8BASE-NEXT: .LBB42_8: @ %atomicrmw.end
; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: add sp, #72
+; CHECK-THUMB8BASE-NEXT: add sp, #88
; CHECK-THUMB8BASE-NEXT: pop {r4, pc}
entry:
%0 = atomicrmw umax ptr @atomic_i64, i64 1 monotonic
@@ -9364,8 +9391,8 @@ define i64 @test_umin_i64() {
; CHECK-THUMB8BASE: @ %bb.0: @ %entry
; CHECK-THUMB8BASE-NEXT: .save {r4, lr}
; CHECK-THUMB8BASE-NEXT: push {r4, lr}
-; CHECK-THUMB8BASE-NEXT: .pad #72
-; CHECK-THUMB8BASE-NEXT: sub sp, #72
+; CHECK-THUMB8BASE-NEXT: .pad #88
+; CHECK-THUMB8BASE-NEXT: sub sp, #88
; CHECK-THUMB8BASE-NEXT: movw r1, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r1, :upper16:atomic_i64
; CHECK-THUMB8BASE-NEXT: ldr r0, [r1, #4]
@@ -9416,33 +9443,42 @@ define i64 @test_umin_i64() {
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB43_7: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB43_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #48] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #36] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #40] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #24] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r4, [sp, #64]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #68]
+; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r4, [sp, #80]
+; CHECK-THUMB8BASE-NEXT: str r3, [sp, #84]
+; CHECK-THUMB8BASE-NEXT: str r2, [sp, #76]
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #72]
+; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #76]
+; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #72]
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #4]
; CHECK-THUMB8BASE-NEXT: str r0, [sp]
; CHECK-THUMB8BASE-NEXT: movw r0, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r0, :upper16:atomic_i64
-; CHECK-THUMB8BASE-NEXT: add r1, sp, #64
+; CHECK-THUMB8BASE-NEXT: add r1, sp, #80
; CHECK-THUMB8BASE-NEXT: bl __atomic_compare_exchange_8
; CHECK-THUMB8BASE-NEXT: mov r2, r0
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #84]
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #68]
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #80]
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #64]
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #68]
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #64]
; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: lsls r2, r2, #24
; CHECK-THUMB8BASE-NEXT: cmp r2, #0
; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: beq .LBB43_1
+; CHECK-THUMB8BASE-NEXT: bne .LBB43_1
; CHECK-THUMB8BASE-NEXT: b .LBB43_8
; CHECK-THUMB8BASE-NEXT: .LBB43_8: @ %atomicrmw.end
; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: add sp, #72
+; CHECK-THUMB8BASE-NEXT: add sp, #88
; CHECK-THUMB8BASE-NEXT: pop {r4, pc}
entry:
%0 = atomicrmw umin ptr @atomic_i64, i64 1 monotonic
diff --git a/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll b/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll
index db71eae97544db..f4d006723a7dbe 100644
--- a/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll
+++ b/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll
@@ -67,20 +67,25 @@ define i8 @rmw8(ptr %p) {
define i8 @cmpxchg8(ptr %p) {
; NO-ATOMIC32-LABEL: cmpxchg8:
; NO-ATOMIC32: @ %bb.0:
-; NO-ATOMIC32-NEXT: .save {r7, lr}
-; NO-ATOMIC32-NEXT: push {r7, lr}
-; NO-ATOMIC32-NEXT: .pad #8
-; NO-ATOMIC32-NEXT: sub sp, #8
-; NO-ATOMIC32-NEXT: add r1, sp, #4
-; NO-ATOMIC32-NEXT: movs r2, #0
+; NO-ATOMIC32-NEXT: .save {r4, lr}
+; NO-ATOMIC32-NEXT: push {r4, lr}
+; NO-ATOMIC32-NEXT: .pad #16
+; NO-ATOMIC32-NEXT: sub sp, #16
+; NO-ATOMIC32-NEXT: add r1, sp, #8
+; NO-ATOMIC32-NEXT: movs r2, #1
; NO-ATOMIC32-NEXT: strb r2, [r1]
+; NO-ATOMIC32-NEXT: add r4, sp, #12
+; NO-ATOMIC32-NEXT: movs r1, #0
+; NO-ATOMIC32-NEXT: strb r1, [r4]
; NO-ATOMIC32-NEXT: movs r3, #5
; NO-ATOMIC32-NEXT: str r3, [sp]
-; NO-ATOMIC32-NEXT: movs r2, #1
+; NO-ATOMIC32-NEXT: mov r1, r4
; NO-ATOMIC32-NEXT: bl __atomic_compare_exchange_1
-; NO-ATOMIC32-NEXT: ldr r0, [sp, #4]
-; NO-ATOMIC32-NEXT: add sp, #8
-; NO-ATOMIC32-NEXT: pop {r7, pc}
+; NO-ATOMIC32-NEXT: ldrb r0, [r4]
+; NO-ATOMIC32-NEXT: add r1, sp, #4
+; NO-ATOMIC32-NEXT: strb r0, [r1]
+; NO-ATOMIC32-NEXT: add sp, #16
+; NO-ATOMIC32-NEXT: pop {r4, pc}
;
; ATOMIC32-LABEL: cmpxchg8:
; ATOMIC32: @ %bb.0:
@@ -162,20 +167,25 @@ define i16 @rmw16(ptr %p) {
define i16 @cmpxchg16(ptr %p) {
; NO-ATOMIC32-LABEL: cmpxchg16:
; NO-ATOMIC32: @ %bb.0:
-; NO-ATOMIC32-NEXT: .save {r7, lr}
-; NO-ATOMIC32-NEXT: push {r7, lr}
-; NO-ATOMIC32-NEXT: .pad #8
-; NO-ATOMIC32-NEXT: sub sp, #8
-; NO-ATOMIC32-NEXT: add r1, sp, #4
-; NO-ATOMIC32-NEXT: movs r2, #0
+; NO-ATOMIC32-NEXT: .save {r4, lr}
+; NO-ATOMIC32-NEXT: push {r4, lr}
+; NO-ATOMIC32-NEXT: .pad #16
+; NO-ATOMIC32-NEXT: sub sp, #16
+; NO-ATOMIC32-NEXT: add r1, sp, #8
+; NO-ATOMIC32-NEXT: movs r2, #1
; NO-ATOMIC32-NEXT: strh r2, [r1]
+; NO-ATOMIC32-NEXT: add r4, sp, #12
+; NO-ATOMIC32-NEXT: movs r1, #0
+; NO-ATOMIC32-NEXT: strh r1, [r4]
; NO-ATOMIC32-NEXT: movs r3, #5
; NO-ATOMIC32-NEXT: str r3, [sp]
-; NO-ATOMIC32-NEXT: movs r2, #1
+; NO-ATOMIC32-NEXT: mov r1, r4
; NO-ATOMIC32-NEXT: bl __atomic_compare_exchange_2
-; NO-ATOMIC32-NEXT: ldr r0, [sp, #4]
-; NO-ATOMIC32-NEXT: add sp, #8
-; NO-ATOMIC32-NEXT: pop {r7, pc}
+; NO-ATOMIC32-NEXT: ldrh r0, [r4]
+; NO-ATOMIC32-NEXT: add r1, sp, #4
+; NO-ATOMIC32-NEXT: strh r0, [r1]
+; NO-ATOMIC32-NEXT: add sp, #16
+; NO-ATOMIC32-NEXT: pop {r4, pc}
;
; ATOMIC32-LABEL: cmpxchg16:
; ATOMIC32: @ %bb.0:
@@ -259,17 +269,19 @@ define i32 @cmpxchg32(ptr %p) {
; NO-ATOMIC32: @ %bb.0:
; NO-ATOMIC32-NEXT: .save {r7, lr}
; NO-ATOMIC32-NEXT: push {r7, lr}
-; NO-ATOMIC32-NEXT: .pad #8
-; NO-ATOMIC32-NEXT: sub sp, #8
+; NO-ATOMIC32-NEXT: .pad #16
+; NO-ATOMIC32-NEXT: sub sp, #16
+; NO-ATOMIC32-NEXT: movs r2, #1
+; NO-ATOMIC32-NEXT: str r2, [sp, #8]
; NO-ATOMIC32-NEXT: movs r1, #0
-; NO-ATOMIC32-NEXT: str r1, [sp, #4]
+; NO-ATOMIC32-NEXT: str r1, [sp, #12]
; NO-ATOMIC32-NEXT: movs r3, #5
; NO-ATOMIC32-NEXT: str r3, [sp]
-; NO-ATOMIC32-NEXT: add r1, sp, #4
-; NO-ATOMIC32-NEXT: movs r2, #1
+; NO-ATOMIC32-NEXT: add r1, sp, #12
; NO-ATOMIC32-NEXT: bl __atomic_compare_exchange_4
-; NO-ATOMIC32-NEXT: ldr r0, [sp, #4]
-; NO-ATOMIC32-NEXT: add sp, #8
+; NO-ATOMIC32-NEXT: ldr r0, [sp, #12]
+; NO-ATOMIC32-NEXT: str r0, [sp, #4]
+; NO-ATOMIC32-NEXT: add sp, #16
; NO-ATOMIC32-NEXT: pop {r7, pc}
;
; ATOMIC32-LABEL: cmpxchg32:
@@ -340,20 +352,24 @@ define i64 @cmpxchg64(ptr %p) {
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .pad #16
-; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: .pad #32
+; CHECK-NEXT: sub sp, #32
; CHECK-NEXT: movs r3, #0
-; CHECK-NEXT: str r3, [sp, #12]
-; CHECK-NEXT: str r3, [sp, #8]
+; CHECK-NEXT: str r3, [sp, #20]
+; CHECK-NEXT: movs r2, #1
+; CHECK-NEXT: str r2, [sp, #16]
+; CHECK-NEXT: str r3, [sp, #28]
+; CHECK-NEXT: str r3, [sp, #24]
; CHECK-NEXT: movs r1, #5
; CHECK-NEXT: str r1, [sp]
; CHECK-NEXT: str r1, [sp, #4]
-; CHECK-NEXT: add r1, sp, #8
-; CHECK-NEXT: movs r2, #1
+; CHECK-NEXT: add r1, sp, #24
; CHECK-NEXT: bl __atomic_compare_exchange_8
-; CHECK-NEXT: ldr r1, [sp, #12]
-; CHECK-NEXT: ldr r0, [sp, #8]
-; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: ldr r1, [sp, #28]
+; CHECK-NEXT: str r1, [sp, #12]
+; CHECK-NEXT: ldr r0, [sp, #24]
+; CHECK-NEXT: str r0, [sp, #8]
+; CHECK-NEXT: add sp, #32
; CHECK-NEXT: pop {r7, pc}
%res = cmpxchg ptr %p, i64 0, i64 1 seq_cst seq_cst
%res.0 = extractvalue { i64, i1 } %res, 0
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
index 193fa6c08600ae..afef1ea1070d23 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
@@ -297,11 +297,11 @@ define float @float_fmax_acquire(ptr %p) nounwind {
define double @double_fadd_acquire(ptr %p) nounwind {
; LA64F-LABEL: double_fadd_acquire:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -48
-; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT: addi.d $sp, $sp, -64
+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.d $s1, $a0, 0
; LA64F-NEXT: lu52i.d $s0, $zero, 1023
@@ -311,24 +311,25 @@ define double @double_fadd_acquire(ptr %p) nounwind {
; LA64F-NEXT: move $a0, $s1
; LA64F-NEXT: move $a1, $s0
; LA64F-NEXT: bl %plt(__adddf3)
-; LA64F-NEXT: st.d $s1, $sp, 8
-; LA64F-NEXT: st.d $a0, $sp, 0
+; LA64F-NEXT: st.d $s1, $sp, 24
+; LA64F-NEXT: st.d $a0, $sp, 16
; LA64F-NEXT: ori $a0, $zero, 8
-; LA64F-NEXT: addi.d $a2, $sp, 8
-; LA64F-NEXT: addi.d $a3, $sp, 0
+; LA64F-NEXT: addi.d $a2, $sp, 24
+; LA64F-NEXT: addi.d $a3, $sp, 16
; LA64F-NEXT: ori $a4, $zero, 2
; LA64F-NEXT: ori $a5, $zero, 2
; LA64F-NEXT: move $a1, $fp
; LA64F-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT: ld.d $s1, $sp, 8
-; LA64F-NEXT: beqz $a0, .LBB4_1
+; LA64F-NEXT: ld.d $s1, $sp, 24
+; LA64F-NEXT: st.d $s1, $sp, 8
+; LA64F-NEXT: bnez $a0, .LBB4_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: move $a0, $s1
-; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 48
+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT: addi.d $sp, $sp, 64
; LA64F-NEXT: ret
;
; LA64D-LABEL: double_fadd_acquire:
@@ -355,8 +356,10 @@ define double @double_fadd_acquire(ptr %p) nounwind {
; LA64D-NEXT: ori $a5, $zero, 2
; LA64D-NEXT: move $a1, $fp
; LA64D-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64D-NEXT: fld.d $fa0, $sp, 16
-; LA64D-NEXT: beqz $a0, .LBB4_1
+; LA64D-NEXT: ld.d $a1, $sp, 16
+; LA64D-NEXT: st.d $a1, $sp, 0
+; LA64D-NEXT: fld.d $fa0, $sp, 0
+; LA64D-NEXT: bnez $a0, .LBB4_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -370,11 +373,11 @@ define double @double_fadd_acquire(ptr %p) nounwind {
define double @double_fsub_acquire(ptr %p) nounwind {
; LA64F-LABEL: double_fsub_acquire:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -48
-; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT: addi.d $sp, $sp, -64
+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.d $s1, $a0, 0
; LA64F-NEXT: lu52i.d $s0, $zero, -1025
@@ -384,24 +387,25 @@ define double @double_fsub_acquire(ptr %p) nounwind {
; LA64F-NEXT: move $a0, $s1
; LA64F-NEXT: move $a1, $s0
; LA64F-NEXT: bl %plt(__adddf3)
-; LA64F-NEXT: st.d $s1, $sp, 8
-; LA64F-NEXT: st.d $a0, $sp, 0
+; LA64F-NEXT: st.d $s1, $sp, 24
+; LA64F-NEXT: st.d $a0, $sp, 16
; LA64F-NEXT: ori $a0, $zero, 8
-; LA64F-NEXT: addi.d $a2, $sp, 8
-; LA64F-NEXT: addi.d $a3, $sp, 0
+; LA64F-NEXT: addi.d $a2, $sp, 24
+; LA64F-NEXT: addi.d $a3, $sp, 16
; LA64F-NEXT: ori $a4, $zero, 2
; LA64F-NEXT: ori $a5, $zero, 2
; LA64F-NEXT: move $a1, $fp
; LA64F-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT: ld.d $s1, $sp, 8
-; LA64F-NEXT: beqz $a0, .LBB5_1
+; LA64F-NEXT: ld.d $s1, $sp, 24
+; LA64F-NEXT: st.d $s1, $sp, 8
+; LA64F-NEXT: bnez $a0, .LBB5_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: move $a0, $s1
-; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 48
+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT: addi.d $sp, $sp, 64
; LA64F-NEXT: ret
;
; LA64D-LABEL: double_fsub_acquire:
@@ -428,8 +432,10 @@ define double @double_fsub_acquire(ptr %p) nounwind {
; LA64D-NEXT: ori $a5, $zero, 2
; LA64D-NEXT: move $a1, $fp
; LA64D-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64D-NEXT: fld.d $fa0, $sp, 16
-; LA64D-NEXT: beqz $a0, .LBB5_1
+; LA64D-NEXT: ld.d $a1, $sp, 16
+; LA64D-NEXT: st.d $a1, $sp, 0
+; LA64D-NEXT: fld.d $fa0, $sp, 0
+; LA64D-NEXT: bnez $a0, .LBB5_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -443,11 +449,11 @@ define double @double_fsub_acquire(ptr %p) nounwind {
define double @double_fmin_acquire(ptr %p) nounwind {
; LA64F-LABEL: double_fmin_acquire:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -48
-; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT: addi.d $sp, $sp, -64
+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.d $s1, $a0, 0
; LA64F-NEXT: lu52i.d $s0, $zero, 1023
@@ -457,24 +463,25 @@ define double @double_fmin_acquire(ptr %p) nounwind {
; LA64F-NEXT: move $a0, $s1
; LA64F-NEXT: move $a1, $s0
; LA64F-NEXT: bl %plt(fmin)
-; LA64F-NEXT: st.d $s1, $sp, 8
-; LA64F-NEXT: st.d $a0, $sp, 0
+; LA64F-NEXT: st.d $s1, $sp, 24
+; LA64F-NEXT: st.d $a0, $sp, 16
; LA64F-NEXT: ori $a0, $zero, 8
-; LA64F-NEXT: addi.d $a2, $sp, 8
-; LA64F-NEXT: addi.d $a3, $sp, 0
+; LA64F-NEXT: addi.d $a2, $sp, 24
+; LA64F-NEXT: addi.d $a3, $sp, 16
; LA64F-NEXT: ori $a4, $zero, 2
; LA64F-NEXT: ori $a5, $zero, 2
; LA64F-NEXT: move $a1, $fp
; LA64F-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT: ld.d $s1, $sp, 8
-; LA64F-NEXT: beqz $a0, .LBB6_1
+; LA64F-NEXT: ld.d $s1, $sp, 24
+; LA64F-NEXT: st.d $s1, $sp, 8
+; LA64F-NEXT: bnez $a0, .LBB6_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: move $a0, $s1
-; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 48
+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT: addi.d $sp, $sp, 64
; LA64F-NEXT: ret
;
; LA64D-LABEL: double_fmin_acquire:
@@ -502,8 +509,10 @@ define double @double_fmin_acquire(ptr %p) nounwind {
; LA64D-NEXT: ori $a5, $zero, 2
; LA64D-NEXT: move $a1, $fp
; LA64D-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64D-NEXT: fld.d $fa0, $sp, 16
-; LA64D-NEXT: beqz $a0, .LBB6_1
+; LA64D-NEXT: ld.d $a1, $sp, 16
+; LA64D-NEXT: st.d $a1, $sp, 0
+; LA64D-NEXT: fld.d $fa0, $sp, 0
+; LA64D-NEXT: bnez $a0, .LBB6_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -517,11 +526,11 @@ define double @double_fmin_acquire(ptr %p) nounwind {
define double @double_fmax_acquire(ptr %p) nounwind {
; LA64F-LABEL: double_fmax_acquire:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -48
-; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT: addi.d $sp, $sp, -64
+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.d $s1, $a0, 0
; LA64F-NEXT: lu52i.d $s0, $zero, 1023
@@ -531,24 +540,25 @@ define double @double_fmax_acquire(ptr %p) nounwind {
; LA64F-NEXT: move $a0, $s1
; LA64F-NEXT: move $a1, $s0
; LA64F-NEXT: bl %plt(fmax)
-; LA64F-NEXT: st.d $s1, $sp, 8
-; LA64F-NEXT: st.d $a0, $sp, 0
+; LA64F-NEXT: st.d $s1, $sp, 24
+; LA64F-NEXT: st.d $a0, $sp, 16
; LA64F-NEXT: ori $a0, $zero, 8
-; LA64F-NEXT: addi.d $a2, $sp, 8
-; LA64F-NEXT: addi.d $a3, $sp, 0
+; LA64F-NEXT: addi.d $a2, $sp, 24
+; LA64F-NEXT: addi.d $a3, $sp, 16
; LA64F-NEXT: ori $a4, $zero, 2
; LA64F-NEXT: ori $a5, $zero, 2
; LA64F-NEXT: move $a1, $fp
; LA64F-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT: ld.d $s1, $sp, 8
-; LA64F-NEXT: beqz $a0, .LBB7_1
+; LA64F-NEXT: ld.d $s1, $sp, 24
+; LA64F-NEXT: st.d $s1, $sp, 8
+; LA64F-NEXT: bnez $a0, .LBB7_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: move $a0, $s1
-; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 48
+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT: addi.d $sp, $sp, 64
; LA64F-NEXT: ret
;
; LA64D-LABEL: double_fmax_acquire:
@@ -576,8 +586,10 @@ define double @double_fmax_acquire(ptr %p) nounwind {
; LA64D-NEXT: ori $a5, $zero, 2
; LA64D-NEXT: move $a1, $fp
; LA64D-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64D-NEXT: fld.d $fa0, $sp, 16
-; LA64D-NEXT: beqz $a0, .LBB7_1
+; LA64D-NEXT: ld.d $a1, $sp, 16
+; LA64D-NEXT: st.d $a1, $sp, 0
+; LA64D-NEXT: fld.d $fa0, $sp, 0
+; LA64D-NEXT: bnez $a0, .LBB7_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -883,11 +895,11 @@ define float @float_fmax_release(ptr %p) nounwind {
define double @double_fadd_release(ptr %p) nounwind {
; LA64F-LABEL: double_fadd_release:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -48
-; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT: addi.d $sp, $sp, -64
+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.d $s1, $a0, 0
; LA64F-NEXT: lu52i.d $s0, $zero, 1023
@@ -897,24 +909,25 @@ define double @double_fadd_release(ptr %p) nounwind {
; LA64F-NEXT: move $a0, $s1
; LA64F-NEXT: move $a1, $s0
; LA64F-NEXT: bl %plt(__adddf3)
-; LA64F-NEXT: st.d $s1, $sp, 8
-; LA64F-NEXT: st.d $a0, $sp, 0
+; LA64F-NEXT: st.d $s1, $sp, 24
+; LA64F-NEXT: st.d $a0, $sp, 16
; LA64F-NEXT: ori $a0, $zero, 8
-; LA64F-NEXT: addi.d $a2, $sp, 8
-; LA64F-NEXT: addi.d $a3, $sp, 0
+; LA64F-NEXT: addi.d $a2, $sp, 24
+; LA64F-NEXT: addi.d $a3, $sp, 16
; LA64F-NEXT: ori $a4, $zero, 3
; LA64F-NEXT: move $a1, $fp
; LA64F-NEXT: move $a5, $zero
; LA64F-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT: ld.d $s1, $sp, 8
-; LA64F-NEXT: beqz $a0, .LBB12_1
+; LA64F-NEXT: ld.d $s1, $sp, 24
+; LA64F-NEXT: st.d $s1, $sp, 8
+; LA64F-NEXT: bnez $a0, .LBB12_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: move $a0, $s1
-; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 48
+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT: addi.d $sp, $sp, 64
; LA64F-NEXT: ret
;
; LA64D-LABEL: double_fadd_release:
@@ -941,8 +954,10 @@ define double @double_fadd_release(ptr %p) nounwind {
; LA64D-NEXT: move $a1, $fp
; LA64D-NEXT: move $a5, $zero
; LA64D-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64D-NEXT: fld.d $fa0, $sp, 16
-; LA64D-NEXT: beqz $a0, .LBB12_1
+; LA64D-NEXT: ld.d $a1, $sp, 16
+; LA64D-NEXT: st.d $a1, $sp, 0
+; LA64D-NEXT: fld.d $fa0, $sp, 0
+; LA64D-NEXT: bnez $a0, .LBB12_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -956,11 +971,11 @@ define double @double_fadd_release(ptr %p) nounwind {
define double @double_fsub_release(ptr %p) nounwind {
; LA64F-LABEL: double_fsub_release:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -48
-; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT: addi.d $sp, $sp, -64
+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.d $s1, $a0, 0
; LA64F-NEXT: lu52i.d $s0, $zero, -1025
@@ -970,24 +985,25 @@ define double @double_fsub_release(ptr %p) nounwind {
; LA64F-NEXT: move $a0, $s1
; LA64F-NEXT: move $a1, $s0
; LA64F-NEXT: bl %plt(__adddf3)
-; LA64F-NEXT: st.d $s1, $sp, 8
-; LA64F-NEXT: st.d $a0, $sp, 0
+; LA64F-NEXT: st.d $s1, $sp, 24
+; LA64F-NEXT: st.d $a0, $sp, 16
; LA64F-NEXT: ori $a0, $zero, 8
-; LA64F-NEXT: addi.d $a2, $sp, 8
-; LA64F-NEXT: addi.d $a3, $sp, 0
+; LA64F-NEXT: addi.d $a2, $sp, 24
+; LA64F-NEXT: addi.d $a3, $sp, 16
; LA64F-NEXT: ori $a4, $zero, 3
; LA64F-NEXT: move $a1, $fp
; LA64F-NEXT: move $a5, $zero
; LA64F-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT: ld.d $s1, $sp, 8
-; LA64F-NEXT: beqz $a0, .LBB13_1
+; LA64F-NEXT: ld.d $s1, $sp, 24
+; LA64F-NEXT: st.d $s1, $sp, 8
+; LA64F-NEXT: bnez $a0, .LBB13_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: move $a0, $s1
-; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 48
+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT: addi.d $sp, $sp, 64
; LA64F-NEXT: ret
;
; LA64D-LABEL: double_fsub_release:
@@ -1014,8 +1030,10 @@ define double @double_fsub_release(ptr %p) nounwind {
; LA64D-NEXT: move $a1, $fp
; LA64D-NEXT: move $a5, $zero
; LA64D-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64D-NEXT: fld.d $fa0, $sp, 16
-; LA64D-NEXT: beqz $a0, .LBB13_1
+; LA64D-NEXT: ld.d $a1, $sp, 16
+; LA64D-NEXT: st.d $a1, $sp, 0
+; LA64D-NEXT: fld.d $fa0, $sp, 0
+; LA64D-NEXT: bnez $a0, .LBB13_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -1029,11 +1047,11 @@ define double @double_fsub_release(ptr %p) nounwind {
define double @double_fmin_release(ptr %p) nounwind {
; LA64F-LABEL: double_fmin_release:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -48
-; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT: addi.d $sp, $sp, -64
+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.d $s1, $a0, 0
; LA64F-NEXT: lu52i.d $s0, $zero, 1023
@@ -1043,24 +1061,25 @@ define double @double_fmin_release(ptr %p) nounwind {
; LA64F-NEXT: move $a0, $s1
; LA64F-NEXT: move $a1, $s0
; LA64F-NEXT: bl %plt(fmin)
-; LA64F-NEXT: st.d $s1, $sp, 8
-; LA64F-NEXT: st.d $a0, $sp, 0
+; LA64F-NEXT: st.d $s1, $sp, 24
+; LA64F-NEXT: st.d $a0, $sp, 16
; LA64F-NEXT: ori $a0, $zero, 8
-; LA64F-NEXT: addi.d $a2, $sp, 8
-; LA64F-NEXT: addi.d $a3, $sp, 0
+; LA64F-NEXT: addi.d $a2, $sp, 24
+; LA64F-NEXT: addi.d $a3, $sp, 16
; LA64F-NEXT: ori $a4, $zero, 3
; LA64F-NEXT: move $a1, $fp
; LA64F-NEXT: move $a5, $zero
; LA64F-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT: ld.d $s1, $sp, 8
-; LA64F-NEXT: beqz $a0, .LBB14_1
+; LA64F-NEXT: ld.d $s1, $sp, 24
+; LA64F-NEXT: st.d $s1, $sp, 8
+; LA64F-NEXT: bnez $a0, .LBB14_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: move $a0, $s1
-; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 48
+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT: addi.d $sp, $sp, 64
; LA64F-NEXT: ret
;
; LA64D-LABEL: double_fmin_release:
@@ -1088,8 +1107,10 @@ define double @double_fmin_release(ptr %p) nounwind {
; LA64D-NEXT: move $a1, $fp
; LA64D-NEXT: move $a5, $zero
; LA64D-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64D-NEXT: fld.d $fa0, $sp, 16
-; LA64D-NEXT: beqz $a0, .LBB14_1
+; LA64D-NEXT: ld.d $a1, $sp, 16
+; LA64D-NEXT: st.d $a1, $sp, 0
+; LA64D-NEXT: fld.d $fa0, $sp, 0
+; LA64D-NEXT: bnez $a0, .LBB14_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -1103,11 +1124,11 @@ define double @double_fmin_release(ptr %p) nounwind {
define double @double_fmax_release(ptr %p) nounwind {
; LA64F-LABEL: double_fmax_release:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -48
-; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT: addi.d $sp, $sp, -64
+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.d $s1, $a0, 0
; LA64F-NEXT: lu52i.d $s0, $zero, 1023
@@ -1117,24 +1138,25 @@ define double @double_fmax_release(ptr %p) nounwind {
; LA64F-NEXT: move $a0, $s1
; LA64F-NEXT: move $a1, $s0
; LA64F-NEXT: bl %plt(fmax)
-; LA64F-NEXT: st.d $s1, $sp, 8
-; LA64F-NEXT: st.d $a0, $sp, 0
+; LA64F-NEXT: st.d $s1, $sp, 24
+; LA64F-NEXT: st.d $a0, $sp, 16
; LA64F-NEXT: ori $a0, $zero, 8
-; LA64F-NEXT: addi.d $a2, $sp, 8
-; LA64F-NEXT: addi.d $a3, $sp, 0
+; LA64F-NEXT: addi.d $a2, $sp, 24
+; LA64F-NEXT: addi.d $a3, $sp, 16
; LA64F-NEXT: ori $a4, $zero, 3
; LA64F-NEXT: move $a1, $fp
; LA64F-NEXT: move $a5, $zero
; LA64F-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT: ld.d $s1, $sp, 8
-; LA64F-NEXT: beqz $a0, .LBB15_1
+; LA64F-NEXT: ld.d $s1, $sp, 24
+; LA64F-NEXT: st.d $s1, $sp, 8
+; LA64F-NEXT: bnez $a0, .LBB15_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: move $a0, $s1
-; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 48
+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT: addi.d $sp, $sp, 64
; LA64F-NEXT: ret
;
; LA64D-LABEL: double_fmax_release:
@@ -1162,8 +1184,10 @@ define double @double_fmax_release(ptr %p) nounwind {
; LA64D-NEXT: move $a1, $fp
; LA64D-NEXT: move $a5, $zero
; LA64D-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64D-NEXT: fld.d $fa0, $sp, 16
-; LA64D-NEXT: beqz $a0, .LBB15_1
+; LA64D-NEXT: ld.d $a1, $sp, 16
+; LA64D-NEXT: st.d $a1, $sp, 0
+; LA64D-NEXT: fld.d $fa0, $sp, 0
+; LA64D-NEXT: bnez $a0, .LBB15_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -1469,11 +1493,11 @@ define float @float_fmax_acq_rel(ptr %p) nounwind {
define double @double_fadd_acq_rel(ptr %p) nounwind {
; LA64F-LABEL: double_fadd_acq_rel:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -48
-; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT: addi.d $sp, $sp, -64
+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.d $s1, $a0, 0
; LA64F-NEXT: lu52i.d $s0, $zero, 1023
@@ -1483,24 +1507,25 @@ define double @double_fadd_acq_rel(ptr %p) nounwind {
; LA64F-NEXT: move $a0, $s1
; LA64F-NEXT: move $a1, $s0
; LA64F-NEXT: bl %plt(__adddf3)
-; LA64F-NEXT: st.d $s1, $sp, 8
-; LA64F-NEXT: st.d $a0, $sp, 0
+; LA64F-NEXT: st.d $s1, $sp, 24
+; LA64F-NEXT: st.d $a0, $sp, 16
; LA64F-NEXT: ori $a0, $zero, 8
-; LA64F-NEXT: addi.d $a2, $sp, 8
-; LA64F-NEXT: addi.d $a3, $sp, 0
+; LA64F-NEXT: addi.d $a2, $sp, 24
+; LA64F-NEXT: addi.d $a3, $sp, 16
; LA64F-NEXT: ori $a4, $zero, 4
; LA64F-NEXT: ori $a5, $zero, 2
; LA64F-NEXT: move $a1, $fp
; LA64F-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT: ld.d $s1, $sp, 8
-; LA64F-NEXT: beqz $a0, .LBB20_1
+; LA64F-NEXT: ld.d $s1, $sp, 24
+; LA64F-NEXT: st.d $s1, $sp, 8
+; LA64F-NEXT: bnez $a0, .LBB20_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: move $a0, $s1
-; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 48
+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT: addi.d $sp, $sp, 64
; LA64F-NEXT: ret
;
; LA64D-LABEL: double_fadd_acq_rel:
@@ -1527,8 +1552,10 @@ define double @double_fadd_acq_rel(ptr %p) nounwind {
; LA64D-NEXT: ori $a5, $zero, 2
; LA64D-NEXT: move $a1, $fp
; LA64D-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64D-NEXT: fld.d $fa0, $sp, 16
-; LA64D-NEXT: beqz $a0, .LBB20_1
+; LA64D-NEXT: ld.d $a1, $sp, 16
+; LA64D-NEXT: st.d $a1, $sp, 0
+; LA64D-NEXT: fld.d $fa0, $sp, 0
+; LA64D-NEXT: bnez $a0, .LBB20_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -1542,11 +1569,11 @@ define double @double_fadd_acq_rel(ptr %p) nounwind {
define double @double_fsub_acq_rel(ptr %p) nounwind {
; LA64F-LABEL: double_fsub_acq_rel:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -48
-; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT: addi.d $sp, $sp, -64
+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.d $s1, $a0, 0
; LA64F-NEXT: lu52i.d $s0, $zero, -1025
@@ -1556,24 +1583,25 @@ define double @double_fsub_acq_rel(ptr %p) nounwind {
; LA64F-NEXT: move $a0, $s1
; LA64F-NEXT: move $a1, $s0
; LA64F-NEXT: bl %plt(__adddf3)
-; LA64F-NEXT: st.d $s1, $sp, 8
-; LA64F-NEXT: st.d $a0, $sp, 0
+; LA64F-NEXT: st.d $s1, $sp, 24
+; LA64F-NEXT: st.d $a0, $sp, 16
; LA64F-NEXT: ori $a0, $zero, 8
-; LA64F-NEXT: addi.d $a2, $sp, 8
-; LA64F-NEXT: addi.d $a3, $sp, 0
+; LA64F-NEXT: addi.d $a2, $sp, 24
+; LA64F-NEXT: addi.d $a3, $sp, 16
; LA64F-NEXT: ori $a4, $zero, 4
; LA64F-NEXT: ori $a5, $zero, 2
; LA64F-NEXT: move $a1, $fp
; LA64F-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT: ld.d $s1, $sp, 8
-; LA64F-NEXT: beqz $a0, .LBB21_1
+; LA64F-NEXT: ld.d $s1, $sp, 24
+; LA64F-NEXT: st.d $s1, $sp, 8
+; LA64F-NEXT: bnez $a0, .LBB21_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: move $a0, $s1
-; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 48
+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT: addi.d $sp, $sp, 64
; LA64F-NEXT: ret
;
; LA64D-LABEL: double_fsub_acq_rel:
@@ -1600,8 +1628,10 @@ define double @double_fsub_acq_rel(ptr %p) nounwind {
; LA64D-NEXT: ori $a5, $zero, 2
; LA64D-NEXT: move $a1, $fp
; LA64D-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64D-NEXT: fld.d $fa0, $sp, 16
-; LA64D-NEXT: beqz $a0, .LBB21_1
+; LA64D-NEXT: ld.d $a1, $sp, 16
+; LA64D-NEXT: st.d $a1, $sp, 0
+; LA64D-NEXT: fld.d $fa0, $sp, 0
+; LA64D-NEXT: bnez $a0, .LBB21_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -1615,11 +1645,11 @@ define double @double_fsub_acq_rel(ptr %p) nounwind {
define double @double_fmin_acq_rel(ptr %p) nounwind {
; LA64F-LABEL: double_fmin_acq_rel:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -48
-; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT: addi.d $sp, $sp, -64
+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.d $s1, $a0, 0
; LA64F-NEXT: lu52i.d $s0, $zero, 1023
@@ -1629,24 +1659,25 @@ define double @double_fmin_acq_rel(ptr %p) nounwind {
; LA64F-NEXT: move $a0, $s1
; LA64F-NEXT: move $a1, $s0
; LA64F-NEXT: bl %plt(fmin)
-; LA64F-NEXT: st.d $s1, $sp, 8
-; LA64F-NEXT: st.d $a0, $sp, 0
+; LA64F-NEXT: st.d $s1, $sp, 24
+; LA64F-NEXT: st.d $a0, $sp, 16
; LA64F-NEXT: ori $a0, $zero, 8
-; LA64F-NEXT: addi.d $a2, $sp, 8
-; LA64F-NEXT: addi.d $a3, $sp, 0
+; LA64F-NEXT: addi.d $a2, $sp, 24
+; LA64F-NEXT: addi.d $a3, $sp, 16
; LA64F-NEXT: ori $a4, $zero, 4
; LA64F-NEXT: ori $a5, $zero, 2
; LA64F-NEXT: move $a1, $fp
; LA64F-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT: ld.d $s1, $sp, 8
-; LA64F-NEXT: beqz $a0, .LBB22_1
+; LA64F-NEXT: ld.d $s1, $sp, 24
+; LA64F-NEXT: st.d $s1, $sp, 8
+; LA64F-NEXT: bnez $a0, .LBB22_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: move $a0, $s1
-; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 48
+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT: addi.d $sp, $sp, 64
; LA64F-NEXT: ret
;
; LA64D-LABEL: double_fmin_acq_rel:
@@ -1674,8 +1705,10 @@ define double @double_fmin_acq_rel(ptr %p) nounwind {
; LA64D-NEXT: ori $a5, $zero, 2
; LA64D-NEXT: move $a1, $fp
; LA64D-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64D-NEXT: fld.d $fa0, $sp, 16
-; LA64D-NEXT: beqz $a0, .LBB22_1
+; LA64D-NEXT: ld.d $a1, $sp, 16
+; LA64D-NEXT: st.d $a1, $sp, 0
+; LA64D-NEXT: fld.d $fa0, $sp, 0
+; LA64D-NEXT: bnez $a0, .LBB22_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -1689,11 +1722,11 @@ define double @double_fmin_acq_rel(ptr %p) nounwind {
define double @double_fmax_acq_rel(ptr %p) nounwind {
; LA64F-LABEL: double_fmax_acq_rel:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -48
-; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT: addi.d $sp, $sp, -64
+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.d $s1, $a0, 0
; LA64F-NEXT: lu52i.d $s0, $zero, 1023
@@ -1703,24 +1736,25 @@ define double @double_fmax_acq_rel(ptr %p) nounwind {
; LA64F-NEXT: move $a0, $s1
; LA64F-NEXT: move $a1, $s0
; LA64F-NEXT: bl %plt(fmax)
-; LA64F-NEXT: st.d $s1, $sp, 8
-; LA64F-NEXT: st.d $a0, $sp, 0
+; LA64F-NEXT: st.d $s1, $sp, 24
+; LA64F-NEXT: st.d $a0, $sp, 16
; LA64F-NEXT: ori $a0, $zero, 8
-; LA64F-NEXT: addi.d $a2, $sp, 8
-; LA64F-NEXT: addi.d $a3, $sp, 0
+; LA64F-NEXT: addi.d $a2, $sp, 24
+; LA64F-NEXT: addi.d $a3, $sp, 16
; LA64F-NEXT: ori $a4, $zero, 4
; LA64F-NEXT: ori $a5, $zero, 2
; LA64F-NEXT: move $a1, $fp
; LA64F-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT: ld.d $s1, $sp, 8
-; LA64F-NEXT: beqz $a0, .LBB23_1
+; LA64F-NEXT: ld.d $s1, $sp, 24
+; LA64F-NEXT: st.d $s1, $sp, 8
+; LA64F-NEXT: bnez $a0, .LBB23_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: move $a0, $s1
-; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 48
+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT: addi.d $sp, $sp, 64
; LA64F-NEXT: ret
;
; LA64D-LABEL: double_fmax_acq_rel:
@@ -1748,8 +1782,10 @@ define double @double_fmax_acq_rel(ptr %p) nounwind {
; LA64D-NEXT: ori $a5, $zero, 2
; LA64D-NEXT: move $a1, $fp
; LA64D-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64D-NEXT: fld.d $fa0, $sp, 16
-; LA64D-NEXT: beqz $a0, .LBB23_1
+; LA64D-NEXT: ld.d $a1, $sp, 16
+; LA64D-NEXT: st.d $a1, $sp, 0
+; LA64D-NEXT: fld.d $fa0, $sp, 0
+; LA64D-NEXT: bnez $a0, .LBB23_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -2055,11 +2091,11 @@ define float @float_fmax_seq_cst(ptr %p) nounwind {
define double @double_fadd_seq_cst(ptr %p) nounwind {
; LA64F-LABEL: double_fadd_seq_cst:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -48
-; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT: addi.d $sp, $sp, -64
+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.d $s1, $a0, 0
; LA64F-NEXT: lu52i.d $s0, $zero, 1023
@@ -2069,24 +2105,25 @@ define double @double_fadd_seq_cst(ptr %p) nounwind {
; LA64F-NEXT: move $a0, $s1
; LA64F-NEXT: move $a1, $s0
; LA64F-NEXT: bl %plt(__adddf3)
-; LA64F-NEXT: st.d $s1, $sp, 8
-; LA64F-NEXT: st.d $a0, $sp, 0
+; LA64F-NEXT: st.d $s1, $sp, 24
+; LA64F-NEXT: st.d $a0, $sp, 16
; LA64F-NEXT: ori $a0, $zero, 8
-; LA64F-NEXT: addi.d $a2, $sp, 8
-; LA64F-NEXT: addi.d $a3, $sp, 0
+; LA64F-NEXT: addi.d $a2, $sp, 24
+; LA64F-NEXT: addi.d $a3, $sp, 16
; LA64F-NEXT: ori $a4, $zero, 5
; LA64F-NEXT: ori $a5, $zero, 5
; LA64F-NEXT: move $a1, $fp
; LA64F-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT: ld.d $s1, $sp, 8
-; LA64F-NEXT: beqz $a0, .LBB28_1
+; LA64F-NEXT: ld.d $s1, $sp, 24
+; LA64F-NEXT: st.d $s1, $sp, 8
+; LA64F-NEXT: bnez $a0, .LBB28_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: move $a0, $s1
-; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 48
+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT: addi.d $sp, $sp, 64
; LA64F-NEXT: ret
;
; LA64D-LABEL: double_fadd_seq_cst:
@@ -2113,8 +2150,10 @@ define double @double_fadd_seq_cst(ptr %p) nounwind {
; LA64D-NEXT: ori $a5, $zero, 5
; LA64D-NEXT: move $a1, $fp
; LA64D-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64D-NEXT: fld.d $fa0, $sp, 16
-; LA64D-NEXT: beqz $a0, .LBB28_1
+; LA64D-NEXT: ld.d $a1, $sp, 16
+; LA64D-NEXT: st.d $a1, $sp, 0
+; LA64D-NEXT: fld.d $fa0, $sp, 0
+; LA64D-NEXT: bnez $a0, .LBB28_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -2128,11 +2167,11 @@ define double @double_fadd_seq_cst(ptr %p) nounwind {
define double @double_fsub_seq_cst(ptr %p) nounwind {
; LA64F-LABEL: double_fsub_seq_cst:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -48
-; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT: addi.d $sp, $sp, -64
+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.d $s1, $a0, 0
; LA64F-NEXT: lu52i.d $s0, $zero, -1025
@@ -2142,24 +2181,25 @@ define double @double_fsub_seq_cst(ptr %p) nounwind {
; LA64F-NEXT: move $a0, $s1
; LA64F-NEXT: move $a1, $s0
; LA64F-NEXT: bl %plt(__adddf3)
-; LA64F-NEXT: st.d $s1, $sp, 8
-; LA64F-NEXT: st.d $a0, $sp, 0
+; LA64F-NEXT: st.d $s1, $sp, 24
+; LA64F-NEXT: st.d $a0, $sp, 16
; LA64F-NEXT: ori $a0, $zero, 8
-; LA64F-NEXT: addi.d $a2, $sp, 8
-; LA64F-NEXT: addi.d $a3, $sp, 0
+; LA64F-NEXT: addi.d $a2, $sp, 24
+; LA64F-NEXT: addi.d $a3, $sp, 16
; LA64F-NEXT: ori $a4, $zero, 5
; LA64F-NEXT: ori $a5, $zero, 5
; LA64F-NEXT: move $a1, $fp
; LA64F-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT: ld.d $s1, $sp, 8
-; LA64F-NEXT: beqz $a0, .LBB29_1
+; LA64F-NEXT: ld.d $s1, $sp, 24
+; LA64F-NEXT: st.d $s1, $sp, 8
+; LA64F-NEXT: bnez $a0, .LBB29_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: move $a0, $s1
-; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 48
+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT: addi.d $sp, $sp, 64
; LA64F-NEXT: ret
;
; LA64D-LABEL: double_fsub_seq_cst:
@@ -2186,8 +2226,10 @@ define double @double_fsub_seq_cst(ptr %p) nounwind {
; LA64D-NEXT: ori $a5, $zero, 5
; LA64D-NEXT: move $a1, $fp
; LA64D-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64D-NEXT: fld.d $fa0, $sp, 16
-; LA64D-NEXT: beqz $a0, .LBB29_1
+; LA64D-NEXT: ld.d $a1, $sp, 16
+; LA64D-NEXT: st.d $a1, $sp, 0
+; LA64D-NEXT: fld.d $fa0, $sp, 0
+; LA64D-NEXT: bnez $a0, .LBB29_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -2201,11 +2243,11 @@ define double @double_fsub_seq_cst(ptr %p) nounwind {
define double @double_fmin_seq_cst(ptr %p) nounwind {
; LA64F-LABEL: double_fmin_seq_cst:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -48
-; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT: addi.d $sp, $sp, -64
+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.d $s1, $a0, 0
; LA64F-NEXT: lu52i.d $s0, $zero, 1023
@@ -2215,24 +2257,25 @@ define double @double_fmin_seq_cst(ptr %p) nounwind {
; LA64F-NEXT: move $a0, $s1
; LA64F-NEXT: move $a1, $s0
; LA64F-NEXT: bl %plt(fmin)
-; LA64F-NEXT: st.d $s1, $sp, 8
-; LA64F-NEXT: st.d $a0, $sp, 0
+; LA64F-NEXT: st.d $s1, $sp, 24
+; LA64F-NEXT: st.d $a0, $sp, 16
; LA64F-NEXT: ori $a0, $zero, 8
-; LA64F-NEXT: addi.d $a2, $sp, 8
-; LA64F-NEXT: addi.d $a3, $sp, 0
+; LA64F-NEXT: addi.d $a2, $sp, 24
+; LA64F-NEXT: addi.d $a3, $sp, 16
; LA64F-NEXT: ori $a4, $zero, 5
; LA64F-NEXT: ori $a5, $zero, 5
; LA64F-NEXT: move $a1, $fp
; LA64F-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT: ld.d $s1, $sp, 8
-; LA64F-NEXT: beqz $a0, .LBB30_1
+; LA64F-NEXT: ld.d $s1, $sp, 24
+; LA64F-NEXT: st.d $s1, $sp, 8
+; LA64F-NEXT: bnez $a0, .LBB30_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: move $a0, $s1
-; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 48
+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT: addi.d $sp, $sp, 64
; LA64F-NEXT: ret
;
; LA64D-LABEL: double_fmin_seq_cst:
@@ -2260,8 +2303,10 @@ define double @double_fmin_seq_cst(ptr %p) nounwind {
; LA64D-NEXT: ori $a5, $zero, 5
; LA64D-NEXT: move $a1, $fp
; LA64D-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64D-NEXT: fld.d $fa0, $sp, 16
-; LA64D-NEXT: beqz $a0, .LBB30_1
+; LA64D-NEXT: ld.d $a1, $sp, 16
+; LA64D-NEXT: st.d $a1, $sp, 0
+; LA64D-NEXT: fld.d $fa0, $sp, 0
+; LA64D-NEXT: bnez $a0, .LBB30_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -2275,11 +2320,11 @@ define double @double_fmin_seq_cst(ptr %p) nounwind {
define double @double_fmax_seq_cst(ptr %p) nounwind {
; LA64F-LABEL: double_fmax_seq_cst:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -48
-; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT: addi.d $sp, $sp, -64
+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.d $s1, $a0, 0
; LA64F-NEXT: lu52i.d $s0, $zero, 1023
@@ -2289,24 +2334,25 @@ define double @double_fmax_seq_cst(ptr %p) nounwind {
; LA64F-NEXT: move $a0, $s1
; LA64F-NEXT: move $a1, $s0
; LA64F-NEXT: bl %plt(fmax)
-; LA64F-NEXT: st.d $s1, $sp, 8
-; LA64F-NEXT: st.d $a0, $sp, 0
+; LA64F-NEXT: st.d $s1, $sp, 24
+; LA64F-NEXT: st.d $a0, $sp, 16
; LA64F-NEXT: ori $a0, $zero, 8
-; LA64F-NEXT: addi.d $a2, $sp, 8
-; LA64F-NEXT: addi.d $a3, $sp, 0
+; LA64F-NEXT: addi.d $a2, $sp, 24
+; LA64F-NEXT: addi.d $a3, $sp, 16
; LA64F-NEXT: ori $a4, $zero, 5
; LA64F-NEXT: ori $a5, $zero, 5
; LA64F-NEXT: move $a1, $fp
; LA64F-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT: ld.d $s1, $sp, 8
-; LA64F-NEXT: beqz $a0, .LBB31_1
+; LA64F-NEXT: ld.d $s1, $sp, 24
+; LA64F-NEXT: st.d $s1, $sp, 8
+; LA64F-NEXT: bnez $a0, .LBB31_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: move $a0, $s1
-; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 48
+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT: addi.d $sp, $sp, 64
; LA64F-NEXT: ret
;
; LA64D-LABEL: double_fmax_seq_cst:
@@ -2334,8 +2380,10 @@ define double @double_fmax_seq_cst(ptr %p) nounwind {
; LA64D-NEXT: ori $a5, $zero, 5
; LA64D-NEXT: move $a1, $fp
; LA64D-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64D-NEXT: fld.d $fa0, $sp, 16
-; LA64D-NEXT: beqz $a0, .LBB31_1
+; LA64D-NEXT: ld.d $a1, $sp, 16
+; LA64D-NEXT: st.d $a1, $sp, 0
+; LA64D-NEXT: fld.d $fa0, $sp, 0
+; LA64D-NEXT: bnez $a0, .LBB31_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -2641,11 +2689,11 @@ define float @float_fmax_monotonic(ptr %p) nounwind {
define double @double_fadd_monotonic(ptr %p) nounwind {
; LA64F-LABEL: double_fadd_monotonic:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -48
-; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT: addi.d $sp, $sp, -64
+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.d $s1, $a0, 0
; LA64F-NEXT: lu52i.d $s0, $zero, 1023
@@ -2655,24 +2703,25 @@ define double @double_fadd_monotonic(ptr %p) nounwind {
; LA64F-NEXT: move $a0, $s1
; LA64F-NEXT: move $a1, $s0
; LA64F-NEXT: bl %plt(__adddf3)
-; LA64F-NEXT: st.d $s1, $sp, 8
-; LA64F-NEXT: st.d $a0, $sp, 0
+; LA64F-NEXT: st.d $s1, $sp, 24
+; LA64F-NEXT: st.d $a0, $sp, 16
; LA64F-NEXT: ori $a0, $zero, 8
-; LA64F-NEXT: addi.d $a2, $sp, 8
-; LA64F-NEXT: addi.d $a3, $sp, 0
+; LA64F-NEXT: addi.d $a2, $sp, 24
+; LA64F-NEXT: addi.d $a3, $sp, 16
; LA64F-NEXT: move $a1, $fp
; LA64F-NEXT: move $a4, $zero
; LA64F-NEXT: move $a5, $zero
; LA64F-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT: ld.d $s1, $sp, 8
-; LA64F-NEXT: beqz $a0, .LBB36_1
+; LA64F-NEXT: ld.d $s1, $sp, 24
+; LA64F-NEXT: st.d $s1, $sp, 8
+; LA64F-NEXT: bnez $a0, .LBB36_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: move $a0, $s1
-; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 48
+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT: addi.d $sp, $sp, 64
; LA64F-NEXT: ret
;
; LA64D-LABEL: double_fadd_monotonic:
@@ -2699,8 +2748,10 @@ define double @double_fadd_monotonic(ptr %p) nounwind {
; LA64D-NEXT: move $a4, $zero
; LA64D-NEXT: move $a5, $zero
; LA64D-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64D-NEXT: fld.d $fa0, $sp, 16
-; LA64D-NEXT: beqz $a0, .LBB36_1
+; LA64D-NEXT: ld.d $a1, $sp, 16
+; LA64D-NEXT: st.d $a1, $sp, 0
+; LA64D-NEXT: fld.d $fa0, $sp, 0
+; LA64D-NEXT: bnez $a0, .LBB36_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -2714,11 +2765,11 @@ define double @double_fadd_monotonic(ptr %p) nounwind {
define double @double_fsub_monotonic(ptr %p) nounwind {
; LA64F-LABEL: double_fsub_monotonic:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -48
-; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT: addi.d $sp, $sp, -64
+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.d $s1, $a0, 0
; LA64F-NEXT: lu52i.d $s0, $zero, -1025
@@ -2728,24 +2779,25 @@ define double @double_fsub_monotonic(ptr %p) nounwind {
; LA64F-NEXT: move $a0, $s1
; LA64F-NEXT: move $a1, $s0
; LA64F-NEXT: bl %plt(__adddf3)
-; LA64F-NEXT: st.d $s1, $sp, 8
-; LA64F-NEXT: st.d $a0, $sp, 0
+; LA64F-NEXT: st.d $s1, $sp, 24
+; LA64F-NEXT: st.d $a0, $sp, 16
; LA64F-NEXT: ori $a0, $zero, 8
-; LA64F-NEXT: addi.d $a2, $sp, 8
-; LA64F-NEXT: addi.d $a3, $sp, 0
+; LA64F-NEXT: addi.d $a2, $sp, 24
+; LA64F-NEXT: addi.d $a3, $sp, 16
; LA64F-NEXT: move $a1, $fp
; LA64F-NEXT: move $a4, $zero
; LA64F-NEXT: move $a5, $zero
; LA64F-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT: ld.d $s1, $sp, 8
-; LA64F-NEXT: beqz $a0, .LBB37_1
+; LA64F-NEXT: ld.d $s1, $sp, 24
+; LA64F-NEXT: st.d $s1, $sp, 8
+; LA64F-NEXT: bnez $a0, .LBB37_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: move $a0, $s1
-; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 48
+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT: addi.d $sp, $sp, 64
; LA64F-NEXT: ret
;
; LA64D-LABEL: double_fsub_monotonic:
@@ -2772,8 +2824,10 @@ define double @double_fsub_monotonic(ptr %p) nounwind {
; LA64D-NEXT: move $a4, $zero
; LA64D-NEXT: move $a5, $zero
; LA64D-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64D-NEXT: fld.d $fa0, $sp, 16
-; LA64D-NEXT: beqz $a0, .LBB37_1
+; LA64D-NEXT: ld.d $a1, $sp, 16
+; LA64D-NEXT: st.d $a1, $sp, 0
+; LA64D-NEXT: fld.d $fa0, $sp, 0
+; LA64D-NEXT: bnez $a0, .LBB37_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -2787,11 +2841,11 @@ define double @double_fsub_monotonic(ptr %p) nounwind {
define double @double_fmin_monotonic(ptr %p) nounwind {
; LA64F-LABEL: double_fmin_monotonic:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -48
-; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT: addi.d $sp, $sp, -64
+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.d $s1, $a0, 0
; LA64F-NEXT: lu52i.d $s0, $zero, 1023
@@ -2801,24 +2855,25 @@ define double @double_fmin_monotonic(ptr %p) nounwind {
; LA64F-NEXT: move $a0, $s1
; LA64F-NEXT: move $a1, $s0
; LA64F-NEXT: bl %plt(fmin)
-; LA64F-NEXT: st.d $s1, $sp, 8
-; LA64F-NEXT: st.d $a0, $sp, 0
+; LA64F-NEXT: st.d $s1, $sp, 24
+; LA64F-NEXT: st.d $a0, $sp, 16
; LA64F-NEXT: ori $a0, $zero, 8
-; LA64F-NEXT: addi.d $a2, $sp, 8
-; LA64F-NEXT: addi.d $a3, $sp, 0
+; LA64F-NEXT: addi.d $a2, $sp, 24
+; LA64F-NEXT: addi.d $a3, $sp, 16
; LA64F-NEXT: move $a1, $fp
; LA64F-NEXT: move $a4, $zero
; LA64F-NEXT: move $a5, $zero
; LA64F-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT: ld.d $s1, $sp, 8
-; LA64F-NEXT: beqz $a0, .LBB38_1
+; LA64F-NEXT: ld.d $s1, $sp, 24
+; LA64F-NEXT: st.d $s1, $sp, 8
+; LA64F-NEXT: bnez $a0, .LBB38_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: move $a0, $s1
-; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 48
+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT: addi.d $sp, $sp, 64
; LA64F-NEXT: ret
;
; LA64D-LABEL: double_fmin_monotonic:
@@ -2846,8 +2901,10 @@ define double @double_fmin_monotonic(ptr %p) nounwind {
; LA64D-NEXT: move $a4, $zero
; LA64D-NEXT: move $a5, $zero
; LA64D-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64D-NEXT: fld.d $fa0, $sp, 16
-; LA64D-NEXT: beqz $a0, .LBB38_1
+; LA64D-NEXT: ld.d $a1, $sp, 16
+; LA64D-NEXT: st.d $a1, $sp, 0
+; LA64D-NEXT: fld.d $fa0, $sp, 0
+; LA64D-NEXT: bnez $a0, .LBB38_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -2861,11 +2918,11 @@ define double @double_fmin_monotonic(ptr %p) nounwind {
define double @double_fmax_monotonic(ptr %p) nounwind {
; LA64F-LABEL: double_fmax_monotonic:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -48
-; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT: addi.d $sp, $sp, -64
+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.d $s1, $a0, 0
; LA64F-NEXT: lu52i.d $s0, $zero, 1023
@@ -2875,24 +2932,25 @@ define double @double_fmax_monotonic(ptr %p) nounwind {
; LA64F-NEXT: move $a0, $s1
; LA64F-NEXT: move $a1, $s0
; LA64F-NEXT: bl %plt(fmax)
-; LA64F-NEXT: st.d $s1, $sp, 8
-; LA64F-NEXT: st.d $a0, $sp, 0
+; LA64F-NEXT: st.d $s1, $sp, 24
+; LA64F-NEXT: st.d $a0, $sp, 16
; LA64F-NEXT: ori $a0, $zero, 8
-; LA64F-NEXT: addi.d $a2, $sp, 8
-; LA64F-NEXT: addi.d $a3, $sp, 0
+; LA64F-NEXT: addi.d $a2, $sp, 24
+; LA64F-NEXT: addi.d $a3, $sp, 16
; LA64F-NEXT: move $a1, $fp
; LA64F-NEXT: move $a4, $zero
; LA64F-NEXT: move $a5, $zero
; LA64F-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT: ld.d $s1, $sp, 8
-; LA64F-NEXT: beqz $a0, .LBB39_1
+; LA64F-NEXT: ld.d $s1, $sp, 24
+; LA64F-NEXT: st.d $s1, $sp, 8
+; LA64F-NEXT: bnez $a0, .LBB39_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: move $a0, $s1
-; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 48
+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT: addi.d $sp, $sp, 64
; LA64F-NEXT: ret
;
; LA64D-LABEL: double_fmax_monotonic:
@@ -2920,8 +2978,10 @@ define double @double_fmax_monotonic(ptr %p) nounwind {
; LA64D-NEXT: move $a4, $zero
; LA64D-NEXT: move $a5, $zero
; LA64D-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64D-NEXT: fld.d $fa0, $sp, 16
-; LA64D-NEXT: beqz $a0, .LBB39_1
+; LA64D-NEXT: ld.d $a1, $sp, 16
+; LA64D-NEXT: st.d $a1, $sp, 0
+; LA64D-NEXT: fld.d $fa0, $sp, 0
+; LA64D-NEXT: bnez $a0, .LBB39_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/all-atomics.ll b/llvm/test/CodeGen/PowerPC/all-atomics.ll
index 093253bf8f6915..365fff55f69568 100644
--- a/llvm/test/CodeGen/PowerPC/all-atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/all-atomics.ll
@@ -509,31 +509,31 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-LABEL: test_op_ignore:
; AIX32: # %bb.0: # %entry
; AIX32-NEXT: mflr 0
-; AIX32-NEXT: stwu 1, -160(1)
+; AIX32-NEXT: stwu 1, -256(1)
; AIX32-NEXT: lwz 3, L..C0(2) # @sc
-; AIX32-NEXT: stw 0, 168(1)
+; AIX32-NEXT: stw 0, 264(1)
; AIX32-NEXT: rlwinm 4, 3, 3, 27, 28
-; AIX32-NEXT: stw 15, 92(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 26, 136(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 28, 144(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 15, 188(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 26, 232(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 28, 240(1) # 4-byte Folded Spill
; AIX32-NEXT: li 15, 1
; AIX32-NEXT: rlwinm 28, 3, 0, 0, 29
; AIX32-NEXT: li 3, 255
; AIX32-NEXT: xori 26, 4, 24
-; AIX32-NEXT: stw 16, 96(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 17, 100(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 18, 104(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 19, 108(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 20, 112(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 21, 116(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 22, 120(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 23, 124(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 24, 128(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 25, 132(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 27, 140(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 29, 148(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 30, 152(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 31, 156(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 16, 192(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 17, 196(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 18, 200(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 19, 204(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 20, 208(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 21, 212(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 22, 216(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 23, 220(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 24, 224(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 25, 228(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 27, 236(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 29, 244(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 30, 248(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 31, 252(1) # 4-byte Folded Spill
; AIX32-NEXT: sync
; AIX32-NEXT: slw 29, 15, 26
; AIX32-NEXT: slw 3, 3, 26
@@ -906,72 +906,94 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-NEXT: bl .__atomic_fetch_xor_8[PR]
; AIX32-NEXT: nop
; AIX32-NEXT: lwz 31, L..C8(2) # @u128
-; AIX32-NEXT: addi 30, 1, 72
-; AIX32-NEXT: addi 29, 1, 56
-; AIX32-NEXT: lwz 5, 12(31)
-; AIX32-NEXT: lwz 4, 8(31)
-; AIX32-NEXT: lwz 6, 4(31)
-; AIX32-NEXT: lwz 7, 0(31)
+; AIX32-NEXT: addi 30, 1, 120
+; AIX32-NEXT: addi 29, 1, 104
+; AIX32-NEXT: lwz 4, 12(31)
+; AIX32-NEXT: lvsl 2, 0, 30
+; AIX32-NEXT: lwz 3, 8(31)
+; AIX32-NEXT: lwz 5, 4(31)
+; AIX32-NEXT: lwz 6, 0(31)
+; AIX32-NEXT: li 18, 15
+; AIX32-NEXT: li 7, 64
+; AIX32-NEXT: addi 17, 1, 88
+; AIX32-NEXT: stxvd2x 34, 1, 7 # 16-byte Folded Spill
; AIX32-NEXT: .align 4
; AIX32-NEXT: L..BB0_49: # %atomicrmw.start
; AIX32-NEXT: #
-; AIX32-NEXT: xori 3, 5, 1
-; AIX32-NEXT: stw 7, 72(1)
-; AIX32-NEXT: stw 7, 56(1)
-; AIX32-NEXT: li 7, 5
-; AIX32-NEXT: stw 3, 68(1)
+; AIX32-NEXT: xori 7, 4, 1
+; AIX32-NEXT: stw 3, 128(1)
+; AIX32-NEXT: stw 3, 112(1)
; AIX32-NEXT: li 3, 16
+; AIX32-NEXT: stw 7, 116(1)
+; AIX32-NEXT: li 7, 5
; AIX32-NEXT: li 8, 5
-; AIX32-NEXT: stw 6, 76(1)
-; AIX32-NEXT: stw 4, 80(1)
-; AIX32-NEXT: stw 5, 84(1)
-; AIX32-NEXT: stw 4, 64(1)
-; AIX32-NEXT: stw 6, 60(1)
+; AIX32-NEXT: stw 6, 120(1)
+; AIX32-NEXT: stw 5, 124(1)
+; AIX32-NEXT: stw 4, 132(1)
+; AIX32-NEXT: stw 6, 104(1)
+; AIX32-NEXT: stw 5, 108(1)
; AIX32-NEXT: mr 4, 31
; AIX32-NEXT: mr 5, 30
; AIX32-NEXT: mr 6, 29
; AIX32-NEXT: bl .__atomic_compare_exchange[PR]
; AIX32-NEXT: nop
-; AIX32-NEXT: lwz 5, 84(1)
-; AIX32-NEXT: lwz 4, 80(1)
-; AIX32-NEXT: lwz 6, 76(1)
-; AIX32-NEXT: lwz 7, 72(1)
-; AIX32-NEXT: cmplwi 3, 0
-; AIX32-NEXT: beq 0, L..BB0_49
+; AIX32-NEXT: li 4, 64
+; AIX32-NEXT: lvx 2, 30, 18
+; AIX32-NEXT: lvx 3, 0, 30
+; AIX32-NEXT: andi. 3, 3, 255
+; AIX32-NEXT: lxvd2x 36, 1, 4 # 16-byte Folded Reload
+; AIX32-NEXT: vperm 2, 3, 2, 4
+; AIX32-NEXT: stxvw4x 34, 0, 17
+; AIX32-NEXT: lwz 4, 100(1)
+; AIX32-NEXT: lwz 3, 96(1)
+; AIX32-NEXT: lwz 5, 92(1)
+; AIX32-NEXT: lwz 6, 88(1)
+; AIX32-NEXT: bne 0, L..BB0_49
; AIX32-NEXT: # %bb.50: # %atomicrmw.end
; AIX32-NEXT: lwz 31, L..C9(2) # @s128
-; AIX32-NEXT: addi 30, 1, 72
-; AIX32-NEXT: addi 29, 1, 56
-; AIX32-NEXT: lwz 5, 12(31)
-; AIX32-NEXT: lwz 4, 8(31)
-; AIX32-NEXT: lwz 6, 4(31)
-; AIX32-NEXT: lwz 7, 0(31)
+; AIX32-NEXT: addi 30, 1, 168
+; AIX32-NEXT: addi 29, 1, 152
+; AIX32-NEXT: lwz 4, 12(31)
+; AIX32-NEXT: lvsl 2, 0, 30
+; AIX32-NEXT: lwz 3, 8(31)
+; AIX32-NEXT: lwz 5, 4(31)
+; AIX32-NEXT: lwz 6, 0(31)
+; AIX32-NEXT: li 18, 15
+; AIX32-NEXT: li 7, 64
+; AIX32-NEXT: addi 17, 1, 136
+; AIX32-NEXT: stxvd2x 34, 1, 7 # 16-byte Folded Spill
; AIX32-NEXT: .align 4
; AIX32-NEXT: L..BB0_51: # %atomicrmw.start2
; AIX32-NEXT: #
-; AIX32-NEXT: xori 3, 5, 1
-; AIX32-NEXT: stw 7, 72(1)
-; AIX32-NEXT: stw 7, 56(1)
-; AIX32-NEXT: li 7, 5
-; AIX32-NEXT: stw 3, 68(1)
+; AIX32-NEXT: xori 7, 4, 1
+; AIX32-NEXT: stw 3, 176(1)
+; AIX32-NEXT: stw 3, 160(1)
; AIX32-NEXT: li 3, 16
+; AIX32-NEXT: stw 7, 164(1)
+; AIX32-NEXT: li 7, 5
; AIX32-NEXT: li 8, 5
-; AIX32-NEXT: stw 6, 76(1)
-; AIX32-NEXT: stw 4, 80(1)
-; AIX32-NEXT: stw 5, 84(1)
-; AIX32-NEXT: stw 4, 64(1)
-; AIX32-NEXT: stw 6, 60(1)
+; AIX32-NEXT: stw 6, 168(1)
+; AIX32-NEXT: stw 5, 172(1)
+; AIX32-NEXT: stw 4, 180(1)
+; AIX32-NEXT: stw 6, 152(1)
+; AIX32-NEXT: stw 5, 156(1)
; AIX32-NEXT: mr 4, 31
; AIX32-NEXT: mr 5, 30
; AIX32-NEXT: mr 6, 29
; AIX32-NEXT: bl .__atomic_compare_exchange[PR]
; AIX32-NEXT: nop
-; AIX32-NEXT: lwz 5, 84(1)
-; AIX32-NEXT: lwz 4, 80(1)
-; AIX32-NEXT: lwz 6, 76(1)
-; AIX32-NEXT: lwz 7, 72(1)
-; AIX32-NEXT: cmplwi 3, 0
-; AIX32-NEXT: beq 0, L..BB0_51
+; AIX32-NEXT: li 4, 64
+; AIX32-NEXT: lvx 2, 30, 18
+; AIX32-NEXT: lvx 3, 0, 30
+; AIX32-NEXT: andi. 3, 3, 255
+; AIX32-NEXT: lxvd2x 36, 1, 4 # 16-byte Folded Reload
+; AIX32-NEXT: vperm 2, 3, 2, 4
+; AIX32-NEXT: stxvw4x 34, 0, 17
+; AIX32-NEXT: lwz 4, 148(1)
+; AIX32-NEXT: lwz 3, 144(1)
+; AIX32-NEXT: lwz 5, 140(1)
+; AIX32-NEXT: lwz 6, 136(1)
+; AIX32-NEXT: bne 0, L..BB0_51
; AIX32-NEXT: # %bb.52: # %atomicrmw.end1
; AIX32-NEXT: li 29, 1
; AIX32-NEXT: li 3, 255
@@ -1156,24 +1178,24 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-NEXT: li 6, 5
; AIX32-NEXT: bl .__atomic_fetch_and_8[PR]
; AIX32-NEXT: nop
-; AIX32-NEXT: lwz 31, 156(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 30, 152(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 29, 148(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 28, 144(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 27, 140(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 26, 136(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 25, 132(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 24, 128(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 23, 124(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 22, 120(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 21, 116(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 20, 112(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 19, 108(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 18, 104(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 17, 100(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 16, 96(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 15, 92(1) # 4-byte Folded Reload
-; AIX32-NEXT: addi 1, 1, 160
+; AIX32-NEXT: lwz 31, 252(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 30, 248(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 29, 244(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 28, 240(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 27, 236(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 26, 232(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 25, 228(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 24, 224(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 23, 220(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 22, 216(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 21, 212(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 20, 208(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 19, 204(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 18, 200(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 17, 196(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 16, 192(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 15, 188(1) # 4-byte Folded Reload
+; AIX32-NEXT: addi 1, 1, 256
; AIX32-NEXT: lwz 0, 8(1)
; AIX32-NEXT: mtlr 0
; AIX32-NEXT: blr
@@ -3185,33 +3207,33 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
; AIX32-LABEL: test_op_and_fetch:
; AIX32: # %bb.0: # %entry
; AIX32-NEXT: mflr 0
-; AIX32-NEXT: stwu 1, -176(1)
-; AIX32-NEXT: stw 0, 184(1)
-; AIX32-NEXT: stw 27, 156(1) # 4-byte Folded Spill
+; AIX32-NEXT: stwu 1, -272(1)
+; AIX32-NEXT: stw 0, 280(1)
+; AIX32-NEXT: stw 27, 252(1) # 4-byte Folded Spill
; AIX32-NEXT: lwz 27, L..C0(2) # @sc
-; AIX32-NEXT: stw 26, 152(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 26, 248(1) # 4-byte Folded Spill
; AIX32-NEXT: lwz 26, L..C1(2) # @uc
; AIX32-NEXT: lbz 3, 0(26)
; AIX32-NEXT: rlwinm 4, 27, 3, 27, 28
-; AIX32-NEXT: stw 24, 144(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 24, 240(1) # 4-byte Folded Spill
; AIX32-NEXT: li 5, 255
-; AIX32-NEXT: stw 13, 100(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 14, 104(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 15, 108(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 13, 196(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 14, 200(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 15, 204(1) # 4-byte Folded Spill
; AIX32-NEXT: xori 24, 4, 24
-; AIX32-NEXT: stw 16, 112(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 17, 116(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 18, 120(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 19, 124(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 20, 128(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 21, 132(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 22, 136(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 23, 140(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 25, 148(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 28, 160(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 29, 164(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 30, 168(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 31, 172(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 16, 208(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 17, 212(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 18, 216(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 19, 220(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 20, 224(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 21, 228(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 22, 232(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 23, 236(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 25, 244(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 28, 256(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 29, 260(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 30, 264(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 31, 268(1) # 4-byte Folded Spill
; AIX32-NEXT: li 17, -1
; AIX32-NEXT: sync
; AIX32-NEXT: rlwinm 22, 27, 0, 0, 29
@@ -3819,8 +3841,10 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
; AIX32-NEXT: stwcx. 4, 0, 13
; AIX32-NEXT: bne 0, L..BB2_57
; AIX32-NEXT: # %bb.58: # %entry
-; AIX32-NEXT: stw 23, 56(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 27, 60(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 20, 64(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 23, 68(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 24, 72(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 27, 76(1) # 4-byte Folded Spill
; AIX32-NEXT: lwsync
; AIX32-NEXT: stw 4, 0(13)
; AIX32-NEXT: lbz 3, 0(26)
@@ -3853,96 +3877,119 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
; AIX32-NEXT: nop
; AIX32-NEXT: nand 3, 4, 29
; AIX32-NEXT: lwz 29, L..C8(2) # @u128
+; AIX32-NEXT: addi 28, 1, 128
; AIX32-NEXT: lbz 23, 0(26)
-; AIX32-NEXT: addi 28, 1, 80
-; AIX32-NEXT: addi 27, 1, 64
-; AIX32-NEXT: stw 17, 0(30)
-; AIX32-NEXT: lwz 4, 12(29)
-; AIX32-NEXT: lwz 5, 8(29)
-; AIX32-NEXT: lwz 6, 4(29)
-; AIX32-NEXT: lwz 7, 0(29)
+; AIX32-NEXT: addi 27, 1, 112
+; AIX32-NEXT: li 20, 15
+; AIX32-NEXT: lwz 4, 8(29)
+; AIX32-NEXT: lwz 5, 4(29)
+; AIX32-NEXT: lvsl 2, 0, 28
; AIX32-NEXT: stw 3, 4(30)
+; AIX32-NEXT: lwz 3, 12(29)
+; AIX32-NEXT: lwz 6, 0(29)
+; AIX32-NEXT: li 7, 80
+; AIX32-NEXT: addi 24, 1, 96
+; AIX32-NEXT: stw 17, 0(30)
+; AIX32-NEXT: stxvd2x 34, 1, 7 # 16-byte Folded Spill
; AIX32-NEXT: .align 4
; AIX32-NEXT: L..BB2_61: # %atomicrmw.start
; AIX32-NEXT: #
-; AIX32-NEXT: and 3, 4, 23
-; AIX32-NEXT: stw 7, 80(1)
-; AIX32-NEXT: li 7, 5
+; AIX32-NEXT: and 7, 3, 23
+; AIX32-NEXT: stw 3, 140(1)
; AIX32-NEXT: li 8, 5
-; AIX32-NEXT: xor 3, 3, 17
-; AIX32-NEXT: stw 6, 84(1)
-; AIX32-NEXT: stw 5, 88(1)
-; AIX32-NEXT: stw 4, 92(1)
-; AIX32-NEXT: mr 4, 29
-; AIX32-NEXT: mr 5, 28
+; AIX32-NEXT: stw 6, 128(1)
; AIX32-NEXT: mr 6, 27
-; AIX32-NEXT: stw 3, 76(1)
+; AIX32-NEXT: xor 3, 7, 17
+; AIX32-NEXT: li 7, 5
+; AIX32-NEXT: stw 5, 132(1)
+; AIX32-NEXT: stw 4, 136(1)
+; AIX32-NEXT: stw 3, 124(1)
; AIX32-NEXT: li 3, 16
-; AIX32-NEXT: stw 17, 72(1)
-; AIX32-NEXT: stw 17, 68(1)
-; AIX32-NEXT: stw 17, 64(1)
+; AIX32-NEXT: mr 4, 29
+; AIX32-NEXT: mr 5, 28
+; AIX32-NEXT: stw 17, 112(1)
+; AIX32-NEXT: stw 17, 116(1)
+; AIX32-NEXT: stw 17, 120(1)
; AIX32-NEXT: bl .__atomic_compare_exchange[PR]
; AIX32-NEXT: nop
-; AIX32-NEXT: lwz 4, 92(1)
-; AIX32-NEXT: lwz 5, 88(1)
-; AIX32-NEXT: lwz 6, 84(1)
-; AIX32-NEXT: lwz 7, 80(1)
-; AIX32-NEXT: cmplwi 3, 0
-; AIX32-NEXT: beq 0, L..BB2_61
+; AIX32-NEXT: li 4, 80
+; AIX32-NEXT: lvx 2, 28, 20
+; AIX32-NEXT: lvx 3, 0, 28
+; AIX32-NEXT: andi. 3, 3, 255
+; AIX32-NEXT: lxvd2x 36, 1, 4 # 16-byte Folded Reload
+; AIX32-NEXT: vperm 2, 3, 2, 4
+; AIX32-NEXT: stxvw4x 34, 0, 24
+; AIX32-NEXT: lwz 3, 108(1)
+; AIX32-NEXT: lwz 4, 104(1)
+; AIX32-NEXT: lwz 5, 100(1)
+; AIX32-NEXT: lwz 6, 96(1)
+; AIX32-NEXT: bne 0, L..BB2_61
; AIX32-NEXT: # %bb.62: # %atomicrmw.end
-; AIX32-NEXT: and 3, 4, 23
+; AIX32-NEXT: and 3, 3, 23
; AIX32-NEXT: stw 17, 0(29)
-; AIX32-NEXT: lbz 23, 0(26)
+; AIX32-NEXT: lwz 28, L..C9(2) # @s128
; AIX32-NEXT: stw 17, 4(29)
; AIX32-NEXT: stw 17, 8(29)
-; AIX32-NEXT: xor 3, 3, 17
-; AIX32-NEXT: addi 28, 1, 80
-; AIX32-NEXT: addi 27, 1, 64
-; AIX32-NEXT: stw 3, 12(29)
-; AIX32-NEXT: lwz 29, L..C9(2) # @s128
-; AIX32-NEXT: lwz 4, 12(29)
-; AIX32-NEXT: lwz 5, 8(29)
-; AIX32-NEXT: lwz 6, 4(29)
-; AIX32-NEXT: lwz 7, 0(29)
+; AIX32-NEXT: xor 6, 3, 17
+; AIX32-NEXT: lbz 23, 0(26)
+; AIX32-NEXT: addi 27, 1, 160
+; AIX32-NEXT: lwz 3, 12(28)
+; AIX32-NEXT: lwz 4, 8(28)
+; AIX32-NEXT: lwz 5, 4(28)
+; AIX32-NEXT: li 20, 15
+; AIX32-NEXT: li 7, 80
+; AIX32-NEXT: stw 6, 12(29)
+; AIX32-NEXT: addi 29, 1, 176
+; AIX32-NEXT: lwz 6, 0(28)
+; AIX32-NEXT: addi 24, 1, 144
+; AIX32-NEXT: lvsl 2, 0, 29
+; AIX32-NEXT: stxvd2x 34, 1, 7 # 16-byte Folded Spill
; AIX32-NEXT: .align 4
; AIX32-NEXT: L..BB2_63: # %atomicrmw.start2
; AIX32-NEXT: #
-; AIX32-NEXT: and 3, 4, 23
-; AIX32-NEXT: stw 7, 80(1)
-; AIX32-NEXT: li 7, 5
+; AIX32-NEXT: and 7, 3, 23
+; AIX32-NEXT: stw 3, 188(1)
; AIX32-NEXT: li 8, 5
-; AIX32-NEXT: xor 3, 3, 17
-; AIX32-NEXT: stw 6, 84(1)
-; AIX32-NEXT: stw 5, 88(1)
-; AIX32-NEXT: stw 4, 92(1)
-; AIX32-NEXT: mr 4, 29
-; AIX32-NEXT: mr 5, 28
+; AIX32-NEXT: stw 6, 176(1)
; AIX32-NEXT: mr 6, 27
-; AIX32-NEXT: stw 3, 76(1)
+; AIX32-NEXT: xor 3, 7, 17
+; AIX32-NEXT: li 7, 5
+; AIX32-NEXT: stw 5, 180(1)
+; AIX32-NEXT: stw 4, 184(1)
+; AIX32-NEXT: stw 3, 172(1)
; AIX32-NEXT: li 3, 16
-; AIX32-NEXT: stw 17, 72(1)
-; AIX32-NEXT: stw 17, 68(1)
-; AIX32-NEXT: stw 17, 64(1)
+; AIX32-NEXT: mr 4, 28
+; AIX32-NEXT: mr 5, 29
+; AIX32-NEXT: stw 17, 160(1)
+; AIX32-NEXT: stw 17, 164(1)
+; AIX32-NEXT: stw 17, 168(1)
; AIX32-NEXT: bl .__atomic_compare_exchange[PR]
; AIX32-NEXT: nop
-; AIX32-NEXT: lwz 4, 92(1)
-; AIX32-NEXT: lwz 5, 88(1)
-; AIX32-NEXT: lwz 6, 84(1)
-; AIX32-NEXT: lwz 7, 80(1)
-; AIX32-NEXT: cmplwi 3, 0
-; AIX32-NEXT: beq 0, L..BB2_63
+; AIX32-NEXT: li 4, 80
+; AIX32-NEXT: lvx 2, 29, 20
+; AIX32-NEXT: lvx 3, 0, 29
+; AIX32-NEXT: andi. 3, 3, 255
+; AIX32-NEXT: lxvd2x 36, 1, 4 # 16-byte Folded Reload
+; AIX32-NEXT: vperm 2, 3, 2, 4
+; AIX32-NEXT: stxvw4x 34, 0, 24
+; AIX32-NEXT: lwz 3, 156(1)
+; AIX32-NEXT: lwz 4, 152(1)
+; AIX32-NEXT: lwz 5, 148(1)
+; AIX32-NEXT: lwz 6, 144(1)
+; AIX32-NEXT: bne 0, L..BB2_63
; AIX32-NEXT: # %bb.64: # %atomicrmw.end1
-; AIX32-NEXT: and 3, 4, 23
-; AIX32-NEXT: li 5, 255
+; AIX32-NEXT: and 3, 3, 23
+; AIX32-NEXT: stw 17, 0(28)
+; AIX32-NEXT: stw 17, 4(28)
+; AIX32-NEXT: stw 17, 8(28)
; AIX32-NEXT: xor 3, 3, 17
-; AIX32-NEXT: stw 17, 0(29)
-; AIX32-NEXT: stw 17, 4(29)
-; AIX32-NEXT: stw 17, 8(29)
-; AIX32-NEXT: slw 5, 5, 24
-; AIX32-NEXT: stw 3, 12(29)
+; AIX32-NEXT: li 5, 255
+; AIX32-NEXT: stw 3, 12(28)
; AIX32-NEXT: lbz 3, 0(26)
; AIX32-NEXT: sync
-; AIX32-NEXT: slw 4, 3, 24
+; AIX32-NEXT: lwz 9, 72(1) # 4-byte Folded Reload
+; AIX32-NEXT: slw 4, 3, 9
+; AIX32-NEXT: slw 5, 5, 9
; AIX32-NEXT: L..BB2_65: # %atomicrmw.end1
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 6, 0, 22
@@ -3953,17 +4000,18 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
; AIX32-NEXT: stwcx. 7, 0, 22
; AIX32-NEXT: bne 0, L..BB2_65
; AIX32-NEXT: # %bb.66: # %atomicrmw.end1
-; AIX32-NEXT: srw 4, 6, 24
+; AIX32-NEXT: srw 4, 6, 9
; AIX32-NEXT: lwsync
; AIX32-NEXT: li 5, 255
; AIX32-NEXT: clrlwi 4, 4, 24
; AIX32-NEXT: slw 5, 5, 21
; AIX32-NEXT: and 3, 4, 3
-; AIX32-NEXT: lwz 4, 60(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 4, 76(1) # 4-byte Folded Reload
; AIX32-NEXT: stb 3, 0(4)
; AIX32-NEXT: lbz 3, 0(26)
; AIX32-NEXT: sync
-; AIX32-NEXT: lwz 9, 56(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 9, 68(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 10, 64(1) # 4-byte Folded Reload
; AIX32-NEXT: slw 4, 3, 21
; AIX32-NEXT: L..BB2_67: # %atomicrmw.end1
; AIX32-NEXT: #
@@ -4020,7 +4068,7 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
; AIX32-NEXT: lwsync
; AIX32-NEXT: clrlwi 4, 4, 16
; AIX32-NEXT: and 3, 4, 3
-; AIX32-NEXT: sth 3, 0(20)
+; AIX32-NEXT: sth 3, 0(10)
; AIX32-NEXT: lbz 3, 0(26)
; AIX32-NEXT: sync
; AIX32-NEXT: L..BB2_73: # %atomicrmw.end1
@@ -4063,27 +4111,27 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
; AIX32-NEXT: nop
; AIX32-NEXT: and 3, 4, 31
; AIX32-NEXT: stw 28, 0(30)
-; AIX32-NEXT: lwz 31, 172(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 29, 164(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 28, 160(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 27, 156(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 26, 152(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 25, 148(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 24, 144(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 31, 268(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 29, 260(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 28, 256(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 27, 252(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 26, 248(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 25, 244(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 24, 240(1) # 4-byte Folded Reload
; AIX32-NEXT: stw 3, 4(30)
-; AIX32-NEXT: lwz 30, 168(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 23, 140(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 22, 136(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 21, 132(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 20, 128(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 19, 124(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 18, 120(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 17, 116(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 16, 112(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 15, 108(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 14, 104(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 13, 100(1) # 4-byte Folded Reload
-; AIX32-NEXT: addi 1, 1, 176
+; AIX32-NEXT: lwz 30, 264(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 23, 236(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 22, 232(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 21, 228(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 20, 224(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 19, 220(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 18, 216(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 17, 212(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 16, 208(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 15, 204(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 14, 200(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 13, 196(1) # 4-byte Folded Reload
+; AIX32-NEXT: addi 1, 1, 272
; AIX32-NEXT: lwz 0, 8(1)
; AIX32-NEXT: mtlr 0
; AIX32-NEXT: blr
@@ -4639,33 +4687,33 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-LABEL: test_compare_and_swap:
; AIX32: # %bb.0: # %entry
; AIX32-NEXT: mflr 0
-; AIX32-NEXT: stwu 1, -128(1)
-; AIX32-NEXT: stw 0, 136(1)
-; AIX32-NEXT: stw 28, 112(1) # 4-byte Folded Spill
+; AIX32-NEXT: stwu 1, -224(1)
+; AIX32-NEXT: stw 0, 232(1)
+; AIX32-NEXT: stw 28, 208(1) # 4-byte Folded Spill
; AIX32-NEXT: lwz 28, L..C0(2) # @sc
-; AIX32-NEXT: stw 29, 116(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 29, 212(1) # 4-byte Folded Spill
; AIX32-NEXT: lwz 29, L..C1(2) # @uc
; AIX32-NEXT: lbz 3, 0(29)
; AIX32-NEXT: rlwinm 5, 28, 3, 27, 28
-; AIX32-NEXT: stw 21, 84(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 21, 180(1) # 4-byte Folded Spill
; AIX32-NEXT: lbz 4, 0(28)
-; AIX32-NEXT: stw 17, 68(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 18, 72(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 19, 76(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 20, 80(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 17, 164(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 18, 168(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 19, 172(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 20, 176(1) # 4-byte Folded Spill
; AIX32-NEXT: xori 21, 5, 24
-; AIX32-NEXT: stw 22, 88(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 23, 92(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 24, 96(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 22, 184(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 23, 188(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 24, 192(1) # 4-byte Folded Spill
; AIX32-NEXT: slw 5, 3, 21
; AIX32-NEXT: li 3, 255
; AIX32-NEXT: slw 4, 4, 21
-; AIX32-NEXT: stw 25, 100(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 26, 104(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 25, 196(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 26, 200(1) # 4-byte Folded Spill
; AIX32-NEXT: slw 3, 3, 21
-; AIX32-NEXT: stw 27, 108(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 30, 120(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 31, 124(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 27, 204(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 30, 216(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 31, 220(1) # 4-byte Folded Spill
; AIX32-NEXT: sync
; AIX32-NEXT: rlwinm 18, 28, 0, 0, 29
; AIX32-NEXT: and 4, 4, 3
@@ -4688,13 +4736,13 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: lwsync
; AIX32-NEXT: lbz 4, 0(29)
; AIX32-NEXT: rlwinm 20, 29, 0, 0, 29
-; AIX32-NEXT: xori 25, 5, 24
-; AIX32-NEXT: slw 5, 3, 25
+; AIX32-NEXT: xori 24, 5, 24
+; AIX32-NEXT: slw 5, 3, 24
; AIX32-NEXT: stb 3, 0(28)
; AIX32-NEXT: li 3, 255
; AIX32-NEXT: sync
-; AIX32-NEXT: slw 6, 4, 25
-; AIX32-NEXT: slw 3, 3, 25
+; AIX32-NEXT: slw 6, 4, 24
+; AIX32-NEXT: slw 3, 3, 24
; AIX32-NEXT: and 4, 5, 3
; AIX32-NEXT: and 5, 6, 3
; AIX32-NEXT: L..BB3_4: # %entry
@@ -4711,7 +4759,7 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: bne 0, L..BB3_4
; AIX32-NEXT: L..BB3_6: # %entry
; AIX32-NEXT: lwsync
-; AIX32-NEXT: srw 4, 6, 25
+; AIX32-NEXT: srw 4, 6, 24
; AIX32-NEXT: lbz 3, 0(28)
; AIX32-NEXT: extsb 5, 3
; AIX32-NEXT: lwz 3, L..C2(2) # @ss
@@ -4719,12 +4767,12 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: sync
; AIX32-NEXT: rlwinm 6, 3, 3, 27, 27
; AIX32-NEXT: rlwinm 22, 3, 0, 0, 29
-; AIX32-NEXT: xori 26, 6, 16
-; AIX32-NEXT: slw 6, 4, 26
+; AIX32-NEXT: xori 25, 6, 16
+; AIX32-NEXT: slw 6, 4, 25
; AIX32-NEXT: li 4, 0
-; AIX32-NEXT: slw 5, 5, 26
+; AIX32-NEXT: slw 5, 5, 25
; AIX32-NEXT: ori 4, 4, 65535
-; AIX32-NEXT: slw 4, 4, 26
+; AIX32-NEXT: slw 4, 4, 25
; AIX32-NEXT: and 5, 5, 4
; AIX32-NEXT: and 6, 6, 4
; AIX32-NEXT: L..BB3_7: # %entry
@@ -4740,7 +4788,7 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: stwcx. 8, 0, 22
; AIX32-NEXT: bne 0, L..BB3_7
; AIX32-NEXT: L..BB3_9: # %entry
-; AIX32-NEXT: srw 4, 7, 26
+; AIX32-NEXT: srw 4, 7, 25
; AIX32-NEXT: lwsync
; AIX32-NEXT: sth 4, 0(3)
; AIX32-NEXT: lbz 3, 0(28)
@@ -4750,12 +4798,12 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: lwz 3, L..C3(2) # @us
; AIX32-NEXT: rlwinm 6, 3, 3, 27, 27
; AIX32-NEXT: rlwinm 19, 3, 0, 0, 29
-; AIX32-NEXT: xori 24, 6, 16
-; AIX32-NEXT: slw 6, 4, 24
+; AIX32-NEXT: xori 23, 6, 16
+; AIX32-NEXT: slw 6, 4, 23
; AIX32-NEXT: li 4, 0
-; AIX32-NEXT: slw 5, 5, 24
+; AIX32-NEXT: slw 5, 5, 23
; AIX32-NEXT: ori 4, 4, 65535
-; AIX32-NEXT: slw 4, 4, 24
+; AIX32-NEXT: slw 4, 4, 23
; AIX32-NEXT: and 5, 5, 4
; AIX32-NEXT: and 6, 6, 4
; AIX32-NEXT: L..BB3_10: # %entry
@@ -4771,7 +4819,7 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: stwcx. 8, 0, 19
; AIX32-NEXT: bne 0, L..BB3_10
; AIX32-NEXT: L..BB3_12: # %entry
-; AIX32-NEXT: srw 4, 7, 24
+; AIX32-NEXT: srw 4, 7, 23
; AIX32-NEXT: lwsync
; AIX32-NEXT: lwz 17, L..C4(2) # @si
; AIX32-NEXT: sth 4, 0(3)
@@ -4807,48 +4855,56 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: bne 0, L..BB3_16
; AIX32-NEXT: L..BB3_18: # %entry
; AIX32-NEXT: lwsync
-; AIX32-NEXT: lwz 31, L..C6(2) # @sll
+; AIX32-NEXT: lwz 30, L..C6(2) # @sll
; AIX32-NEXT: stw 5, 0(27)
-; AIX32-NEXT: lbz 3, 0(28)
-; AIX32-NEXT: li 23, 0
-; AIX32-NEXT: addi 4, 1, 56
+; AIX32-NEXT: lbz 4, 0(28)
+; AIX32-NEXT: lbz 3, 0(29)
+; AIX32-NEXT: li 26, 0
; AIX32-NEXT: li 7, 5
; AIX32-NEXT: li 8, 5
-; AIX32-NEXT: stw 23, 56(1)
-; AIX32-NEXT: extsb 6, 3
-; AIX32-NEXT: lbz 3, 0(29)
+; AIX32-NEXT: stw 26, 80(1)
+; AIX32-NEXT: extsb 6, 4
+; AIX32-NEXT: addi 4, 1, 80
; AIX32-NEXT: srawi 5, 6, 31
-; AIX32-NEXT: stw 3, 60(1)
-; AIX32-NEXT: mr 3, 31
+; AIX32-NEXT: stw 3, 84(1)
+; AIX32-NEXT: mr 3, 30
+; AIX32-NEXT: stw 6, 76(1)
+; AIX32-NEXT: stw 5, 72(1)
; AIX32-NEXT: bl .__atomic_compare_exchange_8[PR]
; AIX32-NEXT: nop
-; AIX32-NEXT: lwz 3, 60(1)
-; AIX32-NEXT: lbz 4, 0(28)
-; AIX32-NEXT: lwz 30, L..C7(2) # @ull
-; AIX32-NEXT: li 7, 5
+; AIX32-NEXT: lbz 3, 0(28)
+; AIX32-NEXT: lwz 4, 80(1)
+; AIX32-NEXT: lwz 5, 84(1)
+; AIX32-NEXT: lbz 7, 0(29)
+; AIX32-NEXT: lwz 31, L..C7(2) # @ull
; AIX32-NEXT: li 8, 5
-; AIX32-NEXT: stw 3, 4(31)
-; AIX32-NEXT: lwz 3, 56(1)
-; AIX32-NEXT: extsb 6, 4
-; AIX32-NEXT: addi 4, 1, 56
+; AIX32-NEXT: stw 26, 104(1)
+; AIX32-NEXT: extsb 6, 3
+; AIX32-NEXT: stw 4, 64(1)
+; AIX32-NEXT: mr 3, 31
+; AIX32-NEXT: stw 5, 68(1)
+; AIX32-NEXT: stw 5, 4(30)
+; AIX32-NEXT: stw 4, 0(30)
+; AIX32-NEXT: stw 7, 108(1)
; AIX32-NEXT: srawi 5, 6, 31
-; AIX32-NEXT: stw 23, 56(1)
-; AIX32-NEXT: stw 3, 0(31)
-; AIX32-NEXT: lbz 3, 0(29)
-; AIX32-NEXT: stw 3, 60(1)
-; AIX32-NEXT: mr 3, 30
+; AIX32-NEXT: addi 4, 1, 104
+; AIX32-NEXT: li 7, 5
+; AIX32-NEXT: stw 6, 100(1)
+; AIX32-NEXT: stw 5, 96(1)
; AIX32-NEXT: bl .__atomic_compare_exchange_8[PR]
; AIX32-NEXT: nop
-; AIX32-NEXT: lwz 4, 60(1)
-; AIX32-NEXT: lwz 3, 56(1)
-; AIX32-NEXT: stw 4, 4(30)
+; AIX32-NEXT: lwz 4, 108(1)
+; AIX32-NEXT: lwz 3, 104(1)
+; AIX32-NEXT: stw 4, 92(1)
+; AIX32-NEXT: stw 4, 4(31)
; AIX32-NEXT: lbz 4, 0(28)
-; AIX32-NEXT: stw 3, 0(30)
-; AIX32-NEXT: lbz 3, 0(29)
-; AIX32-NEXT: sync
; AIX32-NEXT: slw 5, 4, 21
+; AIX32-NEXT: stw 3, 88(1)
+; AIX32-NEXT: stw 3, 0(31)
+; AIX32-NEXT: lbz 3, 0(29)
; AIX32-NEXT: li 4, 255
; AIX32-NEXT: slw 6, 3, 21
+; AIX32-NEXT: sync
; AIX32-NEXT: slw 4, 4, 21
; AIX32-NEXT: and 5, 5, 4
; AIX32-NEXT: and 6, 6, 4
@@ -4870,14 +4926,14 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: lbz 5, 0(28)
; AIX32-NEXT: cmpw 4, 3
; AIX32-NEXT: li 3, 1
-; AIX32-NEXT: iseleq 4, 3, 23
-; AIX32-NEXT: slw 6, 5, 25
+; AIX32-NEXT: iseleq 4, 3, 26
+; AIX32-NEXT: slw 6, 5, 24
; AIX32-NEXT: li 5, 255
; AIX32-NEXT: stw 4, 0(27)
; AIX32-NEXT: lbz 4, 0(29)
-; AIX32-NEXT: slw 5, 5, 25
+; AIX32-NEXT: slw 5, 5, 24
; AIX32-NEXT: sync
-; AIX32-NEXT: slw 7, 4, 25
+; AIX32-NEXT: slw 7, 4, 24
; AIX32-NEXT: and 6, 6, 5
; AIX32-NEXT: and 7, 7, 5
; AIX32-NEXT: L..BB3_22: # %entry
@@ -4893,20 +4949,20 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: stwcx. 9, 0, 20
; AIX32-NEXT: bne 0, L..BB3_22
; AIX32-NEXT: L..BB3_24: # %entry
-; AIX32-NEXT: srw 5, 8, 25
+; AIX32-NEXT: srw 5, 8, 24
; AIX32-NEXT: lwsync
; AIX32-NEXT: cmpw 5, 4
; AIX32-NEXT: lbz 5, 0(28)
-; AIX32-NEXT: iseleq 4, 3, 23
+; AIX32-NEXT: iseleq 4, 3, 26
; AIX32-NEXT: extsb 5, 5
; AIX32-NEXT: stw 4, 0(27)
; AIX32-NEXT: lbz 4, 0(29)
; AIX32-NEXT: sync
-; AIX32-NEXT: slw 6, 5, 26
+; AIX32-NEXT: slw 6, 5, 25
; AIX32-NEXT: li 5, 0
-; AIX32-NEXT: slw 7, 4, 26
+; AIX32-NEXT: slw 7, 4, 25
; AIX32-NEXT: ori 5, 5, 65535
-; AIX32-NEXT: slw 5, 5, 26
+; AIX32-NEXT: slw 5, 5, 25
; AIX32-NEXT: and 6, 6, 5
; AIX32-NEXT: and 7, 7, 5
; AIX32-NEXT: L..BB3_25: # %entry
@@ -4922,20 +4978,20 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: stwcx. 9, 0, 22
; AIX32-NEXT: bne 0, L..BB3_25
; AIX32-NEXT: L..BB3_27: # %entry
-; AIX32-NEXT: srw 5, 8, 26
+; AIX32-NEXT: srw 5, 8, 25
; AIX32-NEXT: lwsync
; AIX32-NEXT: cmpw 5, 4
; AIX32-NEXT: lbz 5, 0(28)
-; AIX32-NEXT: iseleq 4, 3, 23
+; AIX32-NEXT: iseleq 4, 3, 26
; AIX32-NEXT: extsb 5, 5
; AIX32-NEXT: stw 4, 0(27)
; AIX32-NEXT: lbz 4, 0(29)
; AIX32-NEXT: sync
-; AIX32-NEXT: slw 6, 5, 24
+; AIX32-NEXT: slw 6, 5, 23
; AIX32-NEXT: li 5, 0
-; AIX32-NEXT: slw 7, 4, 24
+; AIX32-NEXT: slw 7, 4, 23
; AIX32-NEXT: ori 5, 5, 65535
-; AIX32-NEXT: slw 5, 5, 24
+; AIX32-NEXT: slw 5, 5, 23
; AIX32-NEXT: and 6, 6, 5
; AIX32-NEXT: and 7, 7, 5
; AIX32-NEXT: L..BB3_28: # %entry
@@ -4951,11 +5007,11 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: stwcx. 9, 0, 19
; AIX32-NEXT: bne 0, L..BB3_28
; AIX32-NEXT: L..BB3_30: # %entry
-; AIX32-NEXT: srw 5, 8, 24
+; AIX32-NEXT: srw 5, 8, 23
; AIX32-NEXT: lwsync
; AIX32-NEXT: cmpw 5, 4
; AIX32-NEXT: lbz 5, 0(28)
-; AIX32-NEXT: iseleq 4, 3, 23
+; AIX32-NEXT: iseleq 4, 3, 26
; AIX32-NEXT: stw 4, 0(27)
; AIX32-NEXT: lbz 4, 0(29)
; AIX32-NEXT: sync
@@ -4971,7 +5027,7 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: bne 0, L..BB3_31
; AIX32-NEXT: L..BB3_33: # %entry
; AIX32-NEXT: lwsync
-; AIX32-NEXT: isel 4, 3, 23, 6
+; AIX32-NEXT: isel 4, 3, 26, 6
; AIX32-NEXT: lbz 5, 0(28)
; AIX32-NEXT: stw 4, 0(27)
; AIX32-NEXT: lbz 4, 0(29)
@@ -4988,50 +5044,68 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: bne 0, L..BB3_34
; AIX32-NEXT: L..BB3_36: # %entry
; AIX32-NEXT: lwsync
-; AIX32-NEXT: isel 3, 3, 23, 6
+; AIX32-NEXT: isel 3, 3, 26, 6
; AIX32-NEXT: li 7, 5
; AIX32-NEXT: li 8, 5
; AIX32-NEXT: lbz 4, 0(28)
; AIX32-NEXT: stw 3, 0(27)
; AIX32-NEXT: lbz 3, 0(29)
-; AIX32-NEXT: stw 23, 56(1)
+; AIX32-NEXT: stw 26, 128(1)
; AIX32-NEXT: extsb 6, 4
-; AIX32-NEXT: addi 4, 1, 56
-; AIX32-NEXT: stw 3, 60(1)
-; AIX32-NEXT: mr 3, 31
+; AIX32-NEXT: addi 4, 1, 128
+; AIX32-NEXT: stw 3, 132(1)
+; AIX32-NEXT: mr 3, 30
; AIX32-NEXT: srawi 5, 6, 31
+; AIX32-NEXT: stw 6, 124(1)
+; AIX32-NEXT: stw 5, 120(1)
; AIX32-NEXT: bl .__atomic_compare_exchange_8[PR]
; AIX32-NEXT: nop
+; AIX32-NEXT: clrlwi 3, 3, 24
; AIX32-NEXT: lbz 4, 0(28)
-; AIX32-NEXT: stw 3, 0(27)
-; AIX32-NEXT: lbz 3, 0(29)
+; AIX32-NEXT: lwz 5, 128(1)
; AIX32-NEXT: li 7, 5
; AIX32-NEXT: li 8, 5
+; AIX32-NEXT: stw 26, 152(1)
+; AIX32-NEXT: cntlzw 3, 3
+; AIX32-NEXT: stw 5, 112(1)
+; AIX32-NEXT: lwz 5, 132(1)
; AIX32-NEXT: extsb 6, 4
-; AIX32-NEXT: addi 4, 1, 56
-; AIX32-NEXT: stw 3, 60(1)
-; AIX32-NEXT: mr 3, 30
-; AIX32-NEXT: stw 23, 56(1)
+; AIX32-NEXT: lbz 4, 0(29)
+; AIX32-NEXT: rlwinm 3, 3, 27, 31, 31
+; AIX32-NEXT: stw 6, 148(1)
+; AIX32-NEXT: stw 5, 116(1)
+; AIX32-NEXT: stw 4, 156(1)
; AIX32-NEXT: srawi 5, 6, 31
+; AIX32-NEXT: addi 4, 1, 152
+; AIX32-NEXT: stw 3, 0(27)
+; AIX32-NEXT: mr 3, 31
+; AIX32-NEXT: stw 5, 144(1)
; AIX32-NEXT: bl .__atomic_compare_exchange_8[PR]
; AIX32-NEXT: nop
+; AIX32-NEXT: clrlwi 3, 3, 24
+; AIX32-NEXT: lwz 4, 152(1)
+; AIX32-NEXT: lwz 31, 220(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 30, 216(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 29, 212(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 28, 208(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 26, 200(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 25, 196(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 24, 192(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 23, 188(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 22, 184(1) # 4-byte Folded Reload
+; AIX32-NEXT: cntlzw 3, 3
+; AIX32-NEXT: lwz 21, 180(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 20, 176(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 19, 172(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 18, 168(1) # 4-byte Folded Reload
+; AIX32-NEXT: stw 4, 136(1)
+; AIX32-NEXT: lwz 4, 156(1)
+; AIX32-NEXT: lwz 17, 164(1) # 4-byte Folded Reload
+; AIX32-NEXT: rlwinm 3, 3, 27, 31, 31
+; AIX32-NEXT: stw 4, 140(1)
; AIX32-NEXT: stw 3, 0(27)
-; AIX32-NEXT: lwz 31, 124(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 30, 120(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 29, 116(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 28, 112(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 27, 108(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 26, 104(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 25, 100(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 24, 96(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 23, 92(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 22, 88(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 21, 84(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 20, 80(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 19, 76(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 18, 72(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 17, 68(1) # 4-byte Folded Reload
-; AIX32-NEXT: addi 1, 1, 128
+; AIX32-NEXT: lwz 27, 204(1) # 4-byte Folded Reload
+; AIX32-NEXT: addi 1, 1, 224
; AIX32-NEXT: lwz 0, 8(1)
; AIX32-NEXT: mtlr 0
; AIX32-NEXT: blr
@@ -5616,25 +5690,31 @@ define dso_local i64 @cmpswplp(ptr noundef %ptr, ptr nocapture noundef readnone
; AIX32-LABEL: cmpswplp:
; AIX32: # %bb.0: # %entry
; AIX32-NEXT: mflr 0
-; AIX32-NEXT: stwu 1, -64(1)
+; AIX32-NEXT: stwu 1, -80(1)
; AIX32-NEXT: addic 7, 6, 1
-; AIX32-NEXT: stw 0, 72(1)
+; AIX32-NEXT: stw 0, 88(1)
; AIX32-NEXT: addze 8, 5
-; AIX32-NEXT: stw 6, 60(1)
-; AIX32-NEXT: stw 5, 56(1)
-; AIX32-NEXT: addi 4, 1, 56
+; AIX32-NEXT: stw 6, 76(1)
+; AIX32-NEXT: stw 5, 72(1)
+; AIX32-NEXT: stw 7, 68(1)
+; AIX32-NEXT: stw 8, 64(1)
+; AIX32-NEXT: addi 4, 1, 72
; AIX32-NEXT: mr 5, 8
; AIX32-NEXT: mr 6, 7
; AIX32-NEXT: li 7, 0
; AIX32-NEXT: li 8, 0
; AIX32-NEXT: bl .__atomic_compare_exchange_8[PR]
; AIX32-NEXT: nop
-; AIX32-NEXT: andi. 3, 3, 1
-; AIX32-NEXT: li 3, 66
-; AIX32-NEXT: li 4, 55
-; AIX32-NEXT: iselgt 4, 4, 3
+; AIX32-NEXT: andi. 3, 3, 255
+; AIX32-NEXT: lwz 3, 72(1)
+; AIX32-NEXT: li 4, 66
+; AIX32-NEXT: stw 3, 56(1)
+; AIX32-NEXT: lwz 3, 76(1)
+; AIX32-NEXT: stw 3, 60(1)
+; AIX32-NEXT: li 3, 55
+; AIX32-NEXT: iseleq 4, 3, 4
; AIX32-NEXT: li 3, 0
-; AIX32-NEXT: addi 1, 1, 64
+; AIX32-NEXT: addi 1, 1, 80
; AIX32-NEXT: lwz 0, 8(1)
; AIX32-NEXT: mtlr 0
; AIX32-NEXT: blr
@@ -5669,57 +5749,67 @@ define dso_local i64 @atommax8(ptr nocapture noundef %ptr, i64 noundef %val) loc
; AIX32-LABEL: atommax8:
; AIX32: # %bb.0: # %entry
; AIX32-NEXT: mflr 0
-; AIX32-NEXT: stwu 1, -80(1)
-; AIX32-NEXT: stw 0, 88(1)
-; AIX32-NEXT: stw 30, 72(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 31, 76(1) # 4-byte Folded Spill
-; AIX32-NEXT: mr 31, 5
+; AIX32-NEXT: stwu 1, -112(1)
+; AIX32-NEXT: stw 0, 120(1)
+; AIX32-NEXT: stw 29, 100(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 30, 104(1) # 4-byte Folded Spill
; AIX32-NEXT: mr 30, 4
-; AIX32-NEXT: lwz 4, 4(3)
-; AIX32-NEXT: lwz 5, 0(3)
-; AIX32-NEXT: stw 28, 64(1) # 4-byte Folded Spill
-; AIX32-NEXT: addi 28, 1, 56
-; AIX32-NEXT: stw 29, 68(1) # 4-byte Folded Spill
; AIX32-NEXT: mr 29, 3
+; AIX32-NEXT: lwz 4, 4(3)
+; AIX32-NEXT: lwz 3, 0(3)
+; AIX32-NEXT: stw 28, 96(1) # 4-byte Folded Spill
+; AIX32-NEXT: addi 28, 1, 80
+; AIX32-NEXT: stw 31, 108(1) # 4-byte Folded Spill
+; AIX32-NEXT: mr 31, 5
+; AIX32-NEXT: stw 26, 88(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 27, 92(1) # 4-byte Folded Spill
; AIX32-NEXT: .align 4
; AIX32-NEXT: L..BB7_1: # %atomicrmw.start
; AIX32-NEXT: #
-; AIX32-NEXT: cmplw 5, 30
-; AIX32-NEXT: cmpw 1, 5, 30
+; AIX32-NEXT: cmplw 3, 30
+; AIX32-NEXT: cmpw 1, 3, 30
; AIX32-NEXT: li 7, 5
; AIX32-NEXT: li 8, 5
-; AIX32-NEXT: stw 5, 56(1)
-; AIX32-NEXT: mr 3, 29
+; AIX32-NEXT: stw 3, 80(1)
; AIX32-NEXT: crandc 20, 5, 2
; AIX32-NEXT: cmplw 1, 4, 31
; AIX32-NEXT: crand 21, 2, 5
-; AIX32-NEXT: stw 4, 60(1)
+; AIX32-NEXT: stw 4, 84(1)
; AIX32-NEXT: cror 20, 21, 20
-; AIX32-NEXT: isel 5, 5, 30, 20
-; AIX32-NEXT: isel 6, 4, 31, 20
+; AIX32-NEXT: isel 27, 4, 31, 20
+; AIX32-NEXT: isel 26, 3, 30, 20
+; AIX32-NEXT: mr 3, 29
; AIX32-NEXT: mr 4, 28
+; AIX32-NEXT: mr 5, 26
+; AIX32-NEXT: mr 6, 27
; AIX32-NEXT: bl .__atomic_compare_exchange_8[PR]
; AIX32-NEXT: nop
-; AIX32-NEXT: lwz 4, 60(1)
-; AIX32-NEXT: lwz 5, 56(1)
-; AIX32-NEXT: cmplwi 3, 0
-; AIX32-NEXT: beq 0, L..BB7_1
+; AIX32-NEXT: lwz 4, 84(1)
+; AIX32-NEXT: andi. 3, 3, 255
+; AIX32-NEXT: lwz 3, 80(1)
+; AIX32-NEXT: stw 4, 68(1)
+; AIX32-NEXT: stw 3, 64(1)
+; AIX32-NEXT: bne 0, L..BB7_1
; AIX32-NEXT: # %bb.2: # %atomicrmw.end
-; AIX32-NEXT: cmplw 5, 30
-; AIX32-NEXT: cmpw 1, 5, 30
+; AIX32-NEXT: cmplw 3, 30
+; AIX32-NEXT: cmpw 1, 3, 30
; AIX32-NEXT: li 3, 55
-; AIX32-NEXT: lwz 30, 72(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 29, 68(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 28, 64(1) # 4-byte Folded Reload
+; AIX32-NEXT: stw 26, 72(1)
+; AIX32-NEXT: stw 27, 76(1)
; AIX32-NEXT: crandc 20, 5, 2
; AIX32-NEXT: cmplw 1, 4, 31
; AIX32-NEXT: li 4, 66
-; AIX32-NEXT: lwz 31, 76(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 31, 108(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 30, 104(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 29, 100(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 28, 96(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 27, 92(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 26, 88(1) # 4-byte Folded Reload
; AIX32-NEXT: crand 21, 2, 5
; AIX32-NEXT: cror 20, 21, 20
; AIX32-NEXT: isel 4, 4, 3, 20
; AIX32-NEXT: li 3, 0
-; AIX32-NEXT: addi 1, 1, 80
+; AIX32-NEXT: addi 1, 1, 112
; AIX32-NEXT: lwz 0, 8(1)
; AIX32-NEXT: mtlr 0
; AIX32-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/atomics-i128.ll b/llvm/test/CodeGen/PowerPC/atomics-i128.ll
index 0d231769ac505c..3f28334232e636 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-i128.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-i128.ll
@@ -184,10 +184,11 @@ define i128 @add(ptr %a, i128 %x) {
; PPC-PWR8-LABEL: add:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -80(r1)
-; PPC-PWR8-NEXT: stw r0, 84(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
+; PPC-PWR8-NEXT: stwu r1, -112(r1)
+; PPC-PWR8-NEXT: stw r0, 116(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 112
; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: .cfi_offset r23, -36
; PPC-PWR8-NEXT: .cfi_offset r24, -32
; PPC-PWR8-NEXT: .cfi_offset r25, -28
; PPC-PWR8-NEXT: .cfi_offset r26, -24
@@ -195,63 +196,67 @@ define i128 @add(ptr %a, i128 %x) {
; PPC-PWR8-NEXT: .cfi_offset r28, -16
; PPC-PWR8-NEXT: .cfi_offset r29, -12
; PPC-PWR8-NEXT: .cfi_offset r30, -8
-; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r26, 88(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r27, 92(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r27, r5
; PPC-PWR8-NEXT: mr r26, r3
-; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r28, 96(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r28, r6
; PPC-PWR8-NEXT: lwz r6, 12(r3)
-; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r23, 76(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: lwz r5, 8(r3)
; PPC-PWR8-NEXT: lwz r4, 4(r3)
-; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r25, r1, 32
+; PPC-PWR8-NEXT: stw r24, 80(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r24, r1, 40
; PPC-PWR8-NEXT: lwz r3, 0(r3)
-; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: mr r29, r7
-; PPC-PWR8-NEXT: addi r24, r1, 16
-; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r25, 84(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r25, r1, 56
+; PPC-PWR8-NEXT: addi r23, r1, 16
+; PPC-PWR8-NEXT: stw r29, 100(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r30, 104(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r30, r8
+; PPC-PWR8-NEXT: mr r29, r7
; PPC-PWR8-NEXT: .p2align 4
; PPC-PWR8-NEXT: .LBB1_1: # %atomicrmw.start
; PPC-PWR8-NEXT: #
; PPC-PWR8-NEXT: addc r7, r6, r30
-; PPC-PWR8-NEXT: stw r4, 36(r1)
-; PPC-PWR8-NEXT: stw r3, 32(r1)
+; PPC-PWR8-NEXT: stw r4, 60(r1)
+; PPC-PWR8-NEXT: stw r3, 56(r1)
; PPC-PWR8-NEXT: adde r8, r5, r29
-; PPC-PWR8-NEXT: stw r5, 40(r1)
-; PPC-PWR8-NEXT: stw r6, 44(r1)
+; PPC-PWR8-NEXT: stw r5, 64(r1)
+; PPC-PWR8-NEXT: stw r6, 68(r1)
; PPC-PWR8-NEXT: mr r5, r25
; PPC-PWR8-NEXT: mr r6, r24
; PPC-PWR8-NEXT: adde r4, r4, r28
-; PPC-PWR8-NEXT: stw r7, 28(r1)
+; PPC-PWR8-NEXT: stw r7, 52(r1)
; PPC-PWR8-NEXT: li r7, 5
; PPC-PWR8-NEXT: adde r3, r3, r27
-; PPC-PWR8-NEXT: stw r8, 24(r1)
+; PPC-PWR8-NEXT: stw r8, 48(r1)
; PPC-PWR8-NEXT: li r8, 5
-; PPC-PWR8-NEXT: stw r4, 20(r1)
+; PPC-PWR8-NEXT: stw r4, 44(r1)
; PPC-PWR8-NEXT: mr r4, r26
-; PPC-PWR8-NEXT: stw r3, 16(r1)
+; PPC-PWR8-NEXT: stw r3, 40(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: mr r7, r3
-; PPC-PWR8-NEXT: lwz r6, 44(r1)
-; PPC-PWR8-NEXT: lwz r5, 40(r1)
-; PPC-PWR8-NEXT: lwz r4, 36(r1)
-; PPC-PWR8-NEXT: lwz r3, 32(r1)
-; PPC-PWR8-NEXT: cmplwi r7, 0
-; PPC-PWR8-NEXT: beq cr0, .LBB1_1
+; PPC-PWR8-NEXT: lxvw4x vs0, 0, r25
+; PPC-PWR8-NEXT: andi. r3, r3, 255
+; PPC-PWR8-NEXT: stxvw4x vs0, 0, r23
+; PPC-PWR8-NEXT: lwz r6, 28(r1)
+; PPC-PWR8-NEXT: lwz r5, 24(r1)
+; PPC-PWR8-NEXT: lwz r4, 20(r1)
+; PPC-PWR8-NEXT: lwz r3, 16(r1)
+; PPC-PWR8-NEXT: bne cr0, .LBB1_1
; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
-; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r0, 84(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 80
+; PPC-PWR8-NEXT: lwz r30, 104(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r29, 100(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r28, 96(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r27, 92(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r26, 88(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r25, 84(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r24, 80(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r23, 76(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r0, 116(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 112
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
@@ -326,10 +331,11 @@ define i128 @sub(ptr %a, i128 %x) {
; PPC-PWR8-LABEL: sub:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -80(r1)
-; PPC-PWR8-NEXT: stw r0, 84(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
+; PPC-PWR8-NEXT: stwu r1, -112(r1)
+; PPC-PWR8-NEXT: stw r0, 116(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 112
; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: .cfi_offset r23, -36
; PPC-PWR8-NEXT: .cfi_offset r24, -32
; PPC-PWR8-NEXT: .cfi_offset r25, -28
; PPC-PWR8-NEXT: .cfi_offset r26, -24
@@ -337,63 +343,67 @@ define i128 @sub(ptr %a, i128 %x) {
; PPC-PWR8-NEXT: .cfi_offset r28, -16
; PPC-PWR8-NEXT: .cfi_offset r29, -12
; PPC-PWR8-NEXT: .cfi_offset r30, -8
-; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r26, 88(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r27, 92(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r27, r5
; PPC-PWR8-NEXT: mr r26, r3
-; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r28, 96(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r28, r6
; PPC-PWR8-NEXT: lwz r6, 12(r3)
-; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r23, 76(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: lwz r5, 8(r3)
; PPC-PWR8-NEXT: lwz r4, 4(r3)
-; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r25, r1, 32
+; PPC-PWR8-NEXT: stw r24, 80(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r24, r1, 40
; PPC-PWR8-NEXT: lwz r3, 0(r3)
-; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: mr r29, r7
-; PPC-PWR8-NEXT: addi r24, r1, 16
-; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r25, 84(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r25, r1, 56
+; PPC-PWR8-NEXT: addi r23, r1, 16
+; PPC-PWR8-NEXT: stw r29, 100(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r30, 104(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r30, r8
+; PPC-PWR8-NEXT: mr r29, r7
; PPC-PWR8-NEXT: .p2align 4
; PPC-PWR8-NEXT: .LBB2_1: # %atomicrmw.start
; PPC-PWR8-NEXT: #
; PPC-PWR8-NEXT: subc r7, r6, r30
-; PPC-PWR8-NEXT: stw r4, 36(r1)
-; PPC-PWR8-NEXT: stw r3, 32(r1)
+; PPC-PWR8-NEXT: stw r4, 60(r1)
+; PPC-PWR8-NEXT: stw r3, 56(r1)
; PPC-PWR8-NEXT: subfe r8, r29, r5
-; PPC-PWR8-NEXT: stw r5, 40(r1)
-; PPC-PWR8-NEXT: stw r6, 44(r1)
+; PPC-PWR8-NEXT: stw r5, 64(r1)
+; PPC-PWR8-NEXT: stw r6, 68(r1)
; PPC-PWR8-NEXT: mr r5, r25
; PPC-PWR8-NEXT: mr r6, r24
; PPC-PWR8-NEXT: subfe r4, r28, r4
-; PPC-PWR8-NEXT: stw r7, 28(r1)
+; PPC-PWR8-NEXT: stw r7, 52(r1)
; PPC-PWR8-NEXT: li r7, 5
; PPC-PWR8-NEXT: subfe r3, r27, r3
-; PPC-PWR8-NEXT: stw r8, 24(r1)
+; PPC-PWR8-NEXT: stw r8, 48(r1)
; PPC-PWR8-NEXT: li r8, 5
-; PPC-PWR8-NEXT: stw r4, 20(r1)
+; PPC-PWR8-NEXT: stw r4, 44(r1)
; PPC-PWR8-NEXT: mr r4, r26
-; PPC-PWR8-NEXT: stw r3, 16(r1)
+; PPC-PWR8-NEXT: stw r3, 40(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: mr r7, r3
-; PPC-PWR8-NEXT: lwz r6, 44(r1)
-; PPC-PWR8-NEXT: lwz r5, 40(r1)
-; PPC-PWR8-NEXT: lwz r4, 36(r1)
-; PPC-PWR8-NEXT: lwz r3, 32(r1)
-; PPC-PWR8-NEXT: cmplwi r7, 0
-; PPC-PWR8-NEXT: beq cr0, .LBB2_1
+; PPC-PWR8-NEXT: lxvw4x vs0, 0, r25
+; PPC-PWR8-NEXT: andi. r3, r3, 255
+; PPC-PWR8-NEXT: stxvw4x vs0, 0, r23
+; PPC-PWR8-NEXT: lwz r6, 28(r1)
+; PPC-PWR8-NEXT: lwz r5, 24(r1)
+; PPC-PWR8-NEXT: lwz r4, 20(r1)
+; PPC-PWR8-NEXT: lwz r3, 16(r1)
+; PPC-PWR8-NEXT: bne cr0, .LBB2_1
; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
-; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r0, 84(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 80
+; PPC-PWR8-NEXT: lwz r30, 104(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r29, 100(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r28, 96(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r27, 92(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r26, 88(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r25, 84(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r24, 80(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r23, 76(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r0, 116(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 112
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
@@ -468,10 +478,11 @@ define i128 @and(ptr %a, i128 %x) {
; PPC-PWR8-LABEL: and:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -80(r1)
-; PPC-PWR8-NEXT: stw r0, 84(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
+; PPC-PWR8-NEXT: stwu r1, -112(r1)
+; PPC-PWR8-NEXT: stw r0, 116(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 112
; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: .cfi_offset r23, -36
; PPC-PWR8-NEXT: .cfi_offset r24, -32
; PPC-PWR8-NEXT: .cfi_offset r25, -28
; PPC-PWR8-NEXT: .cfi_offset r26, -24
@@ -479,63 +490,67 @@ define i128 @and(ptr %a, i128 %x) {
; PPC-PWR8-NEXT: .cfi_offset r28, -16
; PPC-PWR8-NEXT: .cfi_offset r29, -12
; PPC-PWR8-NEXT: .cfi_offset r30, -8
-; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r26, 88(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r27, 92(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r27, r5
; PPC-PWR8-NEXT: mr r26, r3
-; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r28, 96(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r28, r6
; PPC-PWR8-NEXT: lwz r6, 12(r3)
-; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r23, 76(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: lwz r5, 8(r3)
; PPC-PWR8-NEXT: lwz r4, 4(r3)
-; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r25, r1, 32
+; PPC-PWR8-NEXT: stw r24, 80(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r24, r1, 40
; PPC-PWR8-NEXT: lwz r3, 0(r3)
-; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: mr r29, r7
-; PPC-PWR8-NEXT: addi r24, r1, 16
-; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r25, 84(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r25, r1, 56
+; PPC-PWR8-NEXT: addi r23, r1, 16
+; PPC-PWR8-NEXT: stw r29, 100(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r30, 104(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r30, r8
+; PPC-PWR8-NEXT: mr r29, r7
; PPC-PWR8-NEXT: .p2align 4
; PPC-PWR8-NEXT: .LBB3_1: # %atomicrmw.start
; PPC-PWR8-NEXT: #
-; PPC-PWR8-NEXT: stw r3, 32(r1)
-; PPC-PWR8-NEXT: and r3, r3, r27
-; PPC-PWR8-NEXT: stw r4, 36(r1)
+; PPC-PWR8-NEXT: stw r3, 56(r1)
+; PPC-PWR8-NEXT: and r7, r6, r30
+; PPC-PWR8-NEXT: stw r4, 60(r1)
+; PPC-PWR8-NEXT: and r8, r5, r29
; PPC-PWR8-NEXT: and r4, r4, r28
-; PPC-PWR8-NEXT: and r7, r5, r29
-; PPC-PWR8-NEXT: stw r5, 40(r1)
-; PPC-PWR8-NEXT: and r5, r6, r30
-; PPC-PWR8-NEXT: stw r6, 44(r1)
-; PPC-PWR8-NEXT: stw r5, 28(r1)
-; PPC-PWR8-NEXT: stw r7, 24(r1)
-; PPC-PWR8-NEXT: mr r5, r25
-; PPC-PWR8-NEXT: li r7, 5
-; PPC-PWR8-NEXT: stw r4, 20(r1)
-; PPC-PWR8-NEXT: stw r3, 16(r1)
+; PPC-PWR8-NEXT: and r3, r3, r27
+; PPC-PWR8-NEXT: stw r5, 64(r1)
+; PPC-PWR8-NEXT: stw r6, 68(r1)
+; PPC-PWR8-NEXT: stw r3, 40(r1)
+; PPC-PWR8-NEXT: stw r4, 44(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: mr r4, r26
+; PPC-PWR8-NEXT: stw r8, 48(r1)
+; PPC-PWR8-NEXT: stw r7, 52(r1)
+; PPC-PWR8-NEXT: mr r5, r25
+; PPC-PWR8-NEXT: li r7, 5
; PPC-PWR8-NEXT: mr r6, r24
; PPC-PWR8-NEXT: li r8, 5
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: mr r7, r3
-; PPC-PWR8-NEXT: lwz r6, 44(r1)
-; PPC-PWR8-NEXT: lwz r5, 40(r1)
-; PPC-PWR8-NEXT: lwz r4, 36(r1)
-; PPC-PWR8-NEXT: lwz r3, 32(r1)
-; PPC-PWR8-NEXT: cmplwi r7, 0
-; PPC-PWR8-NEXT: beq cr0, .LBB3_1
+; PPC-PWR8-NEXT: lxvw4x vs0, 0, r25
+; PPC-PWR8-NEXT: andi. r3, r3, 255
+; PPC-PWR8-NEXT: stxvw4x vs0, 0, r23
+; PPC-PWR8-NEXT: lwz r6, 28(r1)
+; PPC-PWR8-NEXT: lwz r5, 24(r1)
+; PPC-PWR8-NEXT: lwz r4, 20(r1)
+; PPC-PWR8-NEXT: lwz r3, 16(r1)
+; PPC-PWR8-NEXT: bne cr0, .LBB3_1
; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
-; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r0, 84(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 80
+; PPC-PWR8-NEXT: lwz r30, 104(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r29, 100(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r28, 96(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r27, 92(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r26, 88(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r25, 84(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r24, 80(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r23, 76(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r0, 116(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 112
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
@@ -610,10 +625,11 @@ define i128 @or(ptr %a, i128 %x) {
; PPC-PWR8-LABEL: or:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -80(r1)
-; PPC-PWR8-NEXT: stw r0, 84(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
+; PPC-PWR8-NEXT: stwu r1, -112(r1)
+; PPC-PWR8-NEXT: stw r0, 116(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 112
; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: .cfi_offset r23, -36
; PPC-PWR8-NEXT: .cfi_offset r24, -32
; PPC-PWR8-NEXT: .cfi_offset r25, -28
; PPC-PWR8-NEXT: .cfi_offset r26, -24
@@ -621,63 +637,67 @@ define i128 @or(ptr %a, i128 %x) {
; PPC-PWR8-NEXT: .cfi_offset r28, -16
; PPC-PWR8-NEXT: .cfi_offset r29, -12
; PPC-PWR8-NEXT: .cfi_offset r30, -8
-; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r26, 88(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r27, 92(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r27, r5
; PPC-PWR8-NEXT: mr r26, r3
-; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r28, 96(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r28, r6
; PPC-PWR8-NEXT: lwz r6, 12(r3)
-; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r23, 76(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: lwz r5, 8(r3)
; PPC-PWR8-NEXT: lwz r4, 4(r3)
-; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r25, r1, 32
+; PPC-PWR8-NEXT: stw r24, 80(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r24, r1, 40
; PPC-PWR8-NEXT: lwz r3, 0(r3)
-; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: mr r29, r7
-; PPC-PWR8-NEXT: addi r24, r1, 16
-; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r25, 84(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r25, r1, 56
+; PPC-PWR8-NEXT: addi r23, r1, 16
+; PPC-PWR8-NEXT: stw r29, 100(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r30, 104(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r30, r8
+; PPC-PWR8-NEXT: mr r29, r7
; PPC-PWR8-NEXT: .p2align 4
; PPC-PWR8-NEXT: .LBB4_1: # %atomicrmw.start
; PPC-PWR8-NEXT: #
-; PPC-PWR8-NEXT: stw r3, 32(r1)
-; PPC-PWR8-NEXT: or r3, r3, r27
-; PPC-PWR8-NEXT: stw r4, 36(r1)
+; PPC-PWR8-NEXT: stw r3, 56(r1)
+; PPC-PWR8-NEXT: or r7, r6, r30
+; PPC-PWR8-NEXT: stw r4, 60(r1)
+; PPC-PWR8-NEXT: or r8, r5, r29
; PPC-PWR8-NEXT: or r4, r4, r28
-; PPC-PWR8-NEXT: or r7, r5, r29
-; PPC-PWR8-NEXT: stw r5, 40(r1)
-; PPC-PWR8-NEXT: or r5, r6, r30
-; PPC-PWR8-NEXT: stw r6, 44(r1)
-; PPC-PWR8-NEXT: stw r5, 28(r1)
-; PPC-PWR8-NEXT: stw r7, 24(r1)
-; PPC-PWR8-NEXT: mr r5, r25
-; PPC-PWR8-NEXT: li r7, 5
-; PPC-PWR8-NEXT: stw r4, 20(r1)
-; PPC-PWR8-NEXT: stw r3, 16(r1)
+; PPC-PWR8-NEXT: or r3, r3, r27
+; PPC-PWR8-NEXT: stw r5, 64(r1)
+; PPC-PWR8-NEXT: stw r6, 68(r1)
+; PPC-PWR8-NEXT: stw r3, 40(r1)
+; PPC-PWR8-NEXT: stw r4, 44(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: mr r4, r26
+; PPC-PWR8-NEXT: stw r8, 48(r1)
+; PPC-PWR8-NEXT: stw r7, 52(r1)
+; PPC-PWR8-NEXT: mr r5, r25
+; PPC-PWR8-NEXT: li r7, 5
; PPC-PWR8-NEXT: mr r6, r24
; PPC-PWR8-NEXT: li r8, 5
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: mr r7, r3
-; PPC-PWR8-NEXT: lwz r6, 44(r1)
-; PPC-PWR8-NEXT: lwz r5, 40(r1)
-; PPC-PWR8-NEXT: lwz r4, 36(r1)
-; PPC-PWR8-NEXT: lwz r3, 32(r1)
-; PPC-PWR8-NEXT: cmplwi r7, 0
-; PPC-PWR8-NEXT: beq cr0, .LBB4_1
+; PPC-PWR8-NEXT: lxvw4x vs0, 0, r25
+; PPC-PWR8-NEXT: andi. r3, r3, 255
+; PPC-PWR8-NEXT: stxvw4x vs0, 0, r23
+; PPC-PWR8-NEXT: lwz r6, 28(r1)
+; PPC-PWR8-NEXT: lwz r5, 24(r1)
+; PPC-PWR8-NEXT: lwz r4, 20(r1)
+; PPC-PWR8-NEXT: lwz r3, 16(r1)
+; PPC-PWR8-NEXT: bne cr0, .LBB4_1
; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
-; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r0, 84(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 80
+; PPC-PWR8-NEXT: lwz r30, 104(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r29, 100(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r28, 96(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r27, 92(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r26, 88(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r25, 84(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r24, 80(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r23, 76(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r0, 116(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 112
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
@@ -752,10 +772,11 @@ define i128 @xor(ptr %a, i128 %x) {
; PPC-PWR8-LABEL: xor:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -80(r1)
-; PPC-PWR8-NEXT: stw r0, 84(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
+; PPC-PWR8-NEXT: stwu r1, -112(r1)
+; PPC-PWR8-NEXT: stw r0, 116(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 112
; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: .cfi_offset r23, -36
; PPC-PWR8-NEXT: .cfi_offset r24, -32
; PPC-PWR8-NEXT: .cfi_offset r25, -28
; PPC-PWR8-NEXT: .cfi_offset r26, -24
@@ -763,63 +784,67 @@ define i128 @xor(ptr %a, i128 %x) {
; PPC-PWR8-NEXT: .cfi_offset r28, -16
; PPC-PWR8-NEXT: .cfi_offset r29, -12
; PPC-PWR8-NEXT: .cfi_offset r30, -8
-; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r26, 88(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r27, 92(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r27, r5
; PPC-PWR8-NEXT: mr r26, r3
-; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r28, 96(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r28, r6
; PPC-PWR8-NEXT: lwz r6, 12(r3)
-; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r23, 76(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: lwz r5, 8(r3)
; PPC-PWR8-NEXT: lwz r4, 4(r3)
-; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r25, r1, 32
+; PPC-PWR8-NEXT: stw r24, 80(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r24, r1, 40
; PPC-PWR8-NEXT: lwz r3, 0(r3)
-; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: mr r29, r7
-; PPC-PWR8-NEXT: addi r24, r1, 16
-; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r25, 84(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r25, r1, 56
+; PPC-PWR8-NEXT: addi r23, r1, 16
+; PPC-PWR8-NEXT: stw r29, 100(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r30, 104(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r30, r8
+; PPC-PWR8-NEXT: mr r29, r7
; PPC-PWR8-NEXT: .p2align 4
; PPC-PWR8-NEXT: .LBB5_1: # %atomicrmw.start
; PPC-PWR8-NEXT: #
-; PPC-PWR8-NEXT: stw r3, 32(r1)
-; PPC-PWR8-NEXT: xor r3, r3, r27
-; PPC-PWR8-NEXT: stw r4, 36(r1)
+; PPC-PWR8-NEXT: stw r3, 56(r1)
+; PPC-PWR8-NEXT: xor r7, r6, r30
+; PPC-PWR8-NEXT: stw r4, 60(r1)
+; PPC-PWR8-NEXT: xor r8, r5, r29
; PPC-PWR8-NEXT: xor r4, r4, r28
-; PPC-PWR8-NEXT: xor r7, r5, r29
-; PPC-PWR8-NEXT: stw r5, 40(r1)
-; PPC-PWR8-NEXT: xor r5, r6, r30
-; PPC-PWR8-NEXT: stw r6, 44(r1)
-; PPC-PWR8-NEXT: stw r5, 28(r1)
-; PPC-PWR8-NEXT: stw r7, 24(r1)
-; PPC-PWR8-NEXT: mr r5, r25
-; PPC-PWR8-NEXT: li r7, 5
-; PPC-PWR8-NEXT: stw r4, 20(r1)
-; PPC-PWR8-NEXT: stw r3, 16(r1)
+; PPC-PWR8-NEXT: xor r3, r3, r27
+; PPC-PWR8-NEXT: stw r5, 64(r1)
+; PPC-PWR8-NEXT: stw r6, 68(r1)
+; PPC-PWR8-NEXT: stw r3, 40(r1)
+; PPC-PWR8-NEXT: stw r4, 44(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: mr r4, r26
+; PPC-PWR8-NEXT: stw r8, 48(r1)
+; PPC-PWR8-NEXT: stw r7, 52(r1)
+; PPC-PWR8-NEXT: mr r5, r25
+; PPC-PWR8-NEXT: li r7, 5
; PPC-PWR8-NEXT: mr r6, r24
; PPC-PWR8-NEXT: li r8, 5
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: mr r7, r3
-; PPC-PWR8-NEXT: lwz r6, 44(r1)
-; PPC-PWR8-NEXT: lwz r5, 40(r1)
-; PPC-PWR8-NEXT: lwz r4, 36(r1)
-; PPC-PWR8-NEXT: lwz r3, 32(r1)
-; PPC-PWR8-NEXT: cmplwi r7, 0
-; PPC-PWR8-NEXT: beq cr0, .LBB5_1
+; PPC-PWR8-NEXT: lxvw4x vs0, 0, r25
+; PPC-PWR8-NEXT: andi. r3, r3, 255
+; PPC-PWR8-NEXT: stxvw4x vs0, 0, r23
+; PPC-PWR8-NEXT: lwz r6, 28(r1)
+; PPC-PWR8-NEXT: lwz r5, 24(r1)
+; PPC-PWR8-NEXT: lwz r4, 20(r1)
+; PPC-PWR8-NEXT: lwz r3, 16(r1)
+; PPC-PWR8-NEXT: bne cr0, .LBB5_1
; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
-; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r0, 84(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 80
+; PPC-PWR8-NEXT: lwz r30, 104(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r29, 100(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r28, 96(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r27, 92(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r26, 88(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r25, 84(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r24, 80(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r23, 76(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r0, 116(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 112
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
@@ -894,10 +919,11 @@ define i128 @nand(ptr %a, i128 %x) {
; PPC-PWR8-LABEL: nand:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -80(r1)
-; PPC-PWR8-NEXT: stw r0, 84(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
+; PPC-PWR8-NEXT: stwu r1, -112(r1)
+; PPC-PWR8-NEXT: stw r0, 116(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 112
; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: .cfi_offset r23, -36
; PPC-PWR8-NEXT: .cfi_offset r24, -32
; PPC-PWR8-NEXT: .cfi_offset r25, -28
; PPC-PWR8-NEXT: .cfi_offset r26, -24
@@ -905,63 +931,67 @@ define i128 @nand(ptr %a, i128 %x) {
; PPC-PWR8-NEXT: .cfi_offset r28, -16
; PPC-PWR8-NEXT: .cfi_offset r29, -12
; PPC-PWR8-NEXT: .cfi_offset r30, -8
-; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r26, 88(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r27, 92(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r27, r5
; PPC-PWR8-NEXT: mr r26, r3
-; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r28, 96(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r28, r6
; PPC-PWR8-NEXT: lwz r6, 12(r3)
-; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r23, 76(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: lwz r5, 8(r3)
; PPC-PWR8-NEXT: lwz r4, 4(r3)
-; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r25, r1, 32
+; PPC-PWR8-NEXT: stw r24, 80(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r24, r1, 40
; PPC-PWR8-NEXT: lwz r3, 0(r3)
-; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: mr r29, r7
-; PPC-PWR8-NEXT: addi r24, r1, 16
-; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r25, 84(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r25, r1, 56
+; PPC-PWR8-NEXT: addi r23, r1, 16
+; PPC-PWR8-NEXT: stw r29, 100(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r30, 104(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r30, r8
+; PPC-PWR8-NEXT: mr r29, r7
; PPC-PWR8-NEXT: .p2align 4
; PPC-PWR8-NEXT: .LBB6_1: # %atomicrmw.start
; PPC-PWR8-NEXT: #
-; PPC-PWR8-NEXT: stw r3, 32(r1)
-; PPC-PWR8-NEXT: nand r3, r3, r27
-; PPC-PWR8-NEXT: stw r4, 36(r1)
+; PPC-PWR8-NEXT: stw r3, 56(r1)
+; PPC-PWR8-NEXT: nand r7, r6, r30
+; PPC-PWR8-NEXT: stw r4, 60(r1)
+; PPC-PWR8-NEXT: nand r8, r5, r29
; PPC-PWR8-NEXT: nand r4, r4, r28
-; PPC-PWR8-NEXT: nand r7, r5, r29
-; PPC-PWR8-NEXT: stw r5, 40(r1)
-; PPC-PWR8-NEXT: nand r5, r6, r30
-; PPC-PWR8-NEXT: stw r6, 44(r1)
-; PPC-PWR8-NEXT: stw r5, 28(r1)
-; PPC-PWR8-NEXT: stw r7, 24(r1)
-; PPC-PWR8-NEXT: mr r5, r25
-; PPC-PWR8-NEXT: li r7, 5
-; PPC-PWR8-NEXT: stw r4, 20(r1)
-; PPC-PWR8-NEXT: stw r3, 16(r1)
+; PPC-PWR8-NEXT: nand r3, r3, r27
+; PPC-PWR8-NEXT: stw r5, 64(r1)
+; PPC-PWR8-NEXT: stw r6, 68(r1)
+; PPC-PWR8-NEXT: stw r3, 40(r1)
+; PPC-PWR8-NEXT: stw r4, 44(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: mr r4, r26
+; PPC-PWR8-NEXT: stw r8, 48(r1)
+; PPC-PWR8-NEXT: stw r7, 52(r1)
+; PPC-PWR8-NEXT: mr r5, r25
+; PPC-PWR8-NEXT: li r7, 5
; PPC-PWR8-NEXT: mr r6, r24
; PPC-PWR8-NEXT: li r8, 5
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: mr r7, r3
-; PPC-PWR8-NEXT: lwz r6, 44(r1)
-; PPC-PWR8-NEXT: lwz r5, 40(r1)
-; PPC-PWR8-NEXT: lwz r4, 36(r1)
-; PPC-PWR8-NEXT: lwz r3, 32(r1)
-; PPC-PWR8-NEXT: cmplwi r7, 0
-; PPC-PWR8-NEXT: beq cr0, .LBB6_1
+; PPC-PWR8-NEXT: lxvw4x vs0, 0, r25
+; PPC-PWR8-NEXT: andi. r3, r3, 255
+; PPC-PWR8-NEXT: stxvw4x vs0, 0, r23
+; PPC-PWR8-NEXT: lwz r6, 28(r1)
+; PPC-PWR8-NEXT: lwz r5, 24(r1)
+; PPC-PWR8-NEXT: lwz r4, 20(r1)
+; PPC-PWR8-NEXT: lwz r3, 16(r1)
+; PPC-PWR8-NEXT: bne cr0, .LBB6_1
; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
-; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r0, 84(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 80
+; PPC-PWR8-NEXT: lwz r30, 104(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r29, 100(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r28, 96(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r27, 92(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r26, 88(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r25, 84(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r24, 80(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r23, 76(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r0, 116(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 112
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
@@ -995,22 +1025,31 @@ define i128 @cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
; PWR7-LABEL: cas_weak_acquire_acquire:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
-; PWR7-NEXT: stdu r1, -128(r1)
-; PWR7-NEXT: std r0, 144(r1)
-; PWR7-NEXT: .cfi_def_cfa_offset 128
+; PWR7-NEXT: stdu r1, -176(r1)
+; PWR7-NEXT: std r0, 192(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 176
; PWR7-NEXT: .cfi_offset lr, 16
-; PWR7-NEXT: std r5, 120(r1)
-; PWR7-NEXT: std r4, 112(r1)
-; PWR7-NEXT: addi r4, r1, 112
+; PWR7-NEXT: .cfi_offset r30, -16
+; PWR7-NEXT: std r30, 160(r1) # 8-byte Folded Spill
+; PWR7-NEXT: addi r30, r1, 144
+; PWR7-NEXT: std r5, 152(r1)
+; PWR7-NEXT: std r7, 136(r1)
+; PWR7-NEXT: std r6, 128(r1)
; PWR7-NEXT: mr r5, r6
; PWR7-NEXT: mr r6, r7
; PWR7-NEXT: li r7, 2
; PWR7-NEXT: li r8, 2
+; PWR7-NEXT: std r4, 144(r1)
+; PWR7-NEXT: mr r4, r30
; PWR7-NEXT: bl __atomic_compare_exchange_16
; PWR7-NEXT: nop
-; PWR7-NEXT: ld r4, 120(r1)
+; PWR7-NEXT: lxvd2x vs0, 0, r30
+; PWR7-NEXT: addi r3, r1, 112
+; PWR7-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
+; PWR7-NEXT: stxvd2x vs0, 0, r3
; PWR7-NEXT: ld r3, 112(r1)
-; PWR7-NEXT: addi r1, r1, 128
+; PWR7-NEXT: ld r4, 120(r1)
+; PWR7-NEXT: addi r1, r1, 176
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
@@ -1060,33 +1099,40 @@ define i128 @cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
; PPC-PWR8-LABEL: cas_weak_acquire_acquire:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -48(r1)
-; PPC-PWR8-NEXT: stw r0, 52(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
+; PPC-PWR8-NEXT: stwu r1, -80(r1)
+; PPC-PWR8-NEXT: stw r0, 84(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: .cfi_offset r30, -8
; PPC-PWR8-NEXT: mr r4, r3
-; PPC-PWR8-NEXT: lwz r3, 60(r1)
-; PPC-PWR8-NEXT: stw r8, 44(r1)
-; PPC-PWR8-NEXT: stw r7, 40(r1)
-; PPC-PWR8-NEXT: stw r6, 36(r1)
-; PPC-PWR8-NEXT: stw r5, 32(r1)
-; PPC-PWR8-NEXT: addi r5, r1, 32
-; PPC-PWR8-NEXT: addi r6, r1, 16
+; PPC-PWR8-NEXT: lwz r3, 92(r1)
+; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r30, r1, 56
+; PPC-PWR8-NEXT: stw r8, 68(r1)
+; PPC-PWR8-NEXT: stw r7, 64(r1)
+; PPC-PWR8-NEXT: stw r6, 60(r1)
+; PPC-PWR8-NEXT: addi r6, r1, 40
+; PPC-PWR8-NEXT: stw r5, 56(r1)
+; PPC-PWR8-NEXT: mr r5, r30
; PPC-PWR8-NEXT: li r7, 2
; PPC-PWR8-NEXT: li r8, 2
-; PPC-PWR8-NEXT: stw r10, 20(r1)
-; PPC-PWR8-NEXT: stw r9, 16(r1)
-; PPC-PWR8-NEXT: stw r3, 28(r1)
-; PPC-PWR8-NEXT: lwz r3, 56(r1)
-; PPC-PWR8-NEXT: stw r3, 24(r1)
+; PPC-PWR8-NEXT: stw r3, 52(r1)
+; PPC-PWR8-NEXT: lwz r3, 88(r1)
+; PPC-PWR8-NEXT: stw r10, 44(r1)
+; PPC-PWR8-NEXT: stw r9, 40(r1)
+; PPC-PWR8-NEXT: stw r3, 48(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: lwz r6, 44(r1)
-; PPC-PWR8-NEXT: lwz r5, 40(r1)
-; PPC-PWR8-NEXT: lwz r4, 36(r1)
-; PPC-PWR8-NEXT: lwz r3, 32(r1)
-; PPC-PWR8-NEXT: lwz r0, 52(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 48
+; PPC-PWR8-NEXT: lxvw4x vs0, 0, r30
+; PPC-PWR8-NEXT: addi r3, r1, 16
+; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: stxvw4x vs0, 0, r3
+; PPC-PWR8-NEXT: lwz r3, 16(r1)
+; PPC-PWR8-NEXT: lwz r4, 20(r1)
+; PPC-PWR8-NEXT: lwz r5, 24(r1)
+; PPC-PWR8-NEXT: lwz r6, 28(r1)
+; PPC-PWR8-NEXT: lwz r0, 84(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 80
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
@@ -1120,22 +1166,31 @@ define i128 @cas_weak_release_monotonic(ptr %a, i128 %cmp, i128 %new) {
; PWR7-LABEL: cas_weak_release_monotonic:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
-; PWR7-NEXT: stdu r1, -128(r1)
-; PWR7-NEXT: std r0, 144(r1)
-; PWR7-NEXT: .cfi_def_cfa_offset 128
+; PWR7-NEXT: stdu r1, -176(r1)
+; PWR7-NEXT: std r0, 192(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 176
; PWR7-NEXT: .cfi_offset lr, 16
-; PWR7-NEXT: std r5, 120(r1)
-; PWR7-NEXT: std r4, 112(r1)
-; PWR7-NEXT: addi r4, r1, 112
+; PWR7-NEXT: .cfi_offset r30, -16
+; PWR7-NEXT: std r30, 160(r1) # 8-byte Folded Spill
+; PWR7-NEXT: addi r30, r1, 144
+; PWR7-NEXT: std r5, 152(r1)
+; PWR7-NEXT: std r7, 136(r1)
+; PWR7-NEXT: std r6, 128(r1)
; PWR7-NEXT: mr r5, r6
; PWR7-NEXT: mr r6, r7
; PWR7-NEXT: li r7, 3
; PWR7-NEXT: li r8, 0
+; PWR7-NEXT: std r4, 144(r1)
+; PWR7-NEXT: mr r4, r30
; PWR7-NEXT: bl __atomic_compare_exchange_16
; PWR7-NEXT: nop
-; PWR7-NEXT: ld r4, 120(r1)
+; PWR7-NEXT: lxvd2x vs0, 0, r30
+; PWR7-NEXT: addi r3, r1, 112
+; PWR7-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
+; PWR7-NEXT: stxvd2x vs0, 0, r3
; PWR7-NEXT: ld r3, 112(r1)
-; PWR7-NEXT: addi r1, r1, 128
+; PWR7-NEXT: ld r4, 120(r1)
+; PWR7-NEXT: addi r1, r1, 176
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
@@ -1185,33 +1240,40 @@ define i128 @cas_weak_release_monotonic(ptr %a, i128 %cmp, i128 %new) {
; PPC-PWR8-LABEL: cas_weak_release_monotonic:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -48(r1)
-; PPC-PWR8-NEXT: stw r0, 52(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
+; PPC-PWR8-NEXT: stwu r1, -80(r1)
+; PPC-PWR8-NEXT: stw r0, 84(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: .cfi_offset r30, -8
; PPC-PWR8-NEXT: mr r4, r3
-; PPC-PWR8-NEXT: lwz r3, 60(r1)
-; PPC-PWR8-NEXT: stw r8, 44(r1)
-; PPC-PWR8-NEXT: stw r7, 40(r1)
-; PPC-PWR8-NEXT: stw r6, 36(r1)
-; PPC-PWR8-NEXT: stw r5, 32(r1)
-; PPC-PWR8-NEXT: addi r5, r1, 32
-; PPC-PWR8-NEXT: addi r6, r1, 16
+; PPC-PWR8-NEXT: lwz r3, 92(r1)
+; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r30, r1, 56
+; PPC-PWR8-NEXT: stw r8, 68(r1)
+; PPC-PWR8-NEXT: stw r7, 64(r1)
+; PPC-PWR8-NEXT: stw r6, 60(r1)
+; PPC-PWR8-NEXT: addi r6, r1, 40
+; PPC-PWR8-NEXT: stw r5, 56(r1)
+; PPC-PWR8-NEXT: mr r5, r30
; PPC-PWR8-NEXT: li r7, 3
; PPC-PWR8-NEXT: li r8, 0
-; PPC-PWR8-NEXT: stw r10, 20(r1)
-; PPC-PWR8-NEXT: stw r9, 16(r1)
-; PPC-PWR8-NEXT: stw r3, 28(r1)
-; PPC-PWR8-NEXT: lwz r3, 56(r1)
-; PPC-PWR8-NEXT: stw r3, 24(r1)
+; PPC-PWR8-NEXT: stw r3, 52(r1)
+; PPC-PWR8-NEXT: lwz r3, 88(r1)
+; PPC-PWR8-NEXT: stw r10, 44(r1)
+; PPC-PWR8-NEXT: stw r9, 40(r1)
+; PPC-PWR8-NEXT: stw r3, 48(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: lwz r6, 44(r1)
-; PPC-PWR8-NEXT: lwz r5, 40(r1)
-; PPC-PWR8-NEXT: lwz r4, 36(r1)
-; PPC-PWR8-NEXT: lwz r3, 32(r1)
-; PPC-PWR8-NEXT: lwz r0, 52(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 48
+; PPC-PWR8-NEXT: lxvw4x vs0, 0, r30
+; PPC-PWR8-NEXT: addi r3, r1, 16
+; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: stxvw4x vs0, 0, r3
+; PPC-PWR8-NEXT: lwz r3, 16(r1)
+; PPC-PWR8-NEXT: lwz r4, 20(r1)
+; PPC-PWR8-NEXT: lwz r5, 24(r1)
+; PPC-PWR8-NEXT: lwz r6, 28(r1)
+; PPC-PWR8-NEXT: lwz r0, 84(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 80
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
@@ -1246,22 +1308,31 @@ define i128 @cas_sc_sc(ptr %a, i128 %cmp, i128 %new) {
; PWR7-LABEL: cas_sc_sc:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
-; PWR7-NEXT: stdu r1, -128(r1)
-; PWR7-NEXT: std r0, 144(r1)
-; PWR7-NEXT: .cfi_def_cfa_offset 128
+; PWR7-NEXT: stdu r1, -176(r1)
+; PWR7-NEXT: std r0, 192(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 176
; PWR7-NEXT: .cfi_offset lr, 16
-; PWR7-NEXT: std r5, 120(r1)
-; PWR7-NEXT: std r4, 112(r1)
-; PWR7-NEXT: addi r4, r1, 112
+; PWR7-NEXT: .cfi_offset r30, -16
+; PWR7-NEXT: std r30, 160(r1) # 8-byte Folded Spill
+; PWR7-NEXT: addi r30, r1, 144
+; PWR7-NEXT: std r5, 152(r1)
+; PWR7-NEXT: std r7, 136(r1)
+; PWR7-NEXT: std r6, 128(r1)
; PWR7-NEXT: mr r5, r6
; PWR7-NEXT: mr r6, r7
; PWR7-NEXT: li r7, 5
; PWR7-NEXT: li r8, 5
+; PWR7-NEXT: std r4, 144(r1)
+; PWR7-NEXT: mr r4, r30
; PWR7-NEXT: bl __atomic_compare_exchange_16
; PWR7-NEXT: nop
-; PWR7-NEXT: ld r4, 120(r1)
+; PWR7-NEXT: lxvd2x vs0, 0, r30
+; PWR7-NEXT: addi r3, r1, 112
+; PWR7-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
+; PWR7-NEXT: stxvd2x vs0, 0, r3
; PWR7-NEXT: ld r3, 112(r1)
-; PWR7-NEXT: addi r1, r1, 128
+; PWR7-NEXT: ld r4, 120(r1)
+; PWR7-NEXT: addi r1, r1, 176
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
@@ -1313,33 +1384,40 @@ define i128 @cas_sc_sc(ptr %a, i128 %cmp, i128 %new) {
; PPC-PWR8-LABEL: cas_sc_sc:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -48(r1)
-; PPC-PWR8-NEXT: stw r0, 52(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
+; PPC-PWR8-NEXT: stwu r1, -80(r1)
+; PPC-PWR8-NEXT: stw r0, 84(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: .cfi_offset r30, -8
; PPC-PWR8-NEXT: mr r4, r3
-; PPC-PWR8-NEXT: lwz r3, 60(r1)
-; PPC-PWR8-NEXT: stw r8, 44(r1)
-; PPC-PWR8-NEXT: stw r7, 40(r1)
-; PPC-PWR8-NEXT: stw r6, 36(r1)
-; PPC-PWR8-NEXT: stw r5, 32(r1)
-; PPC-PWR8-NEXT: addi r5, r1, 32
-; PPC-PWR8-NEXT: addi r6, r1, 16
+; PPC-PWR8-NEXT: lwz r3, 92(r1)
+; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r30, r1, 56
+; PPC-PWR8-NEXT: stw r8, 68(r1)
+; PPC-PWR8-NEXT: stw r7, 64(r1)
+; PPC-PWR8-NEXT: stw r6, 60(r1)
+; PPC-PWR8-NEXT: addi r6, r1, 40
+; PPC-PWR8-NEXT: stw r5, 56(r1)
+; PPC-PWR8-NEXT: mr r5, r30
; PPC-PWR8-NEXT: li r7, 5
; PPC-PWR8-NEXT: li r8, 5
-; PPC-PWR8-NEXT: stw r10, 20(r1)
-; PPC-PWR8-NEXT: stw r9, 16(r1)
-; PPC-PWR8-NEXT: stw r3, 28(r1)
-; PPC-PWR8-NEXT: lwz r3, 56(r1)
-; PPC-PWR8-NEXT: stw r3, 24(r1)
+; PPC-PWR8-NEXT: stw r3, 52(r1)
+; PPC-PWR8-NEXT: lwz r3, 88(r1)
+; PPC-PWR8-NEXT: stw r10, 44(r1)
+; PPC-PWR8-NEXT: stw r9, 40(r1)
+; PPC-PWR8-NEXT: stw r3, 48(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: lwz r6, 44(r1)
-; PPC-PWR8-NEXT: lwz r5, 40(r1)
-; PPC-PWR8-NEXT: lwz r4, 36(r1)
-; PPC-PWR8-NEXT: lwz r3, 32(r1)
-; PPC-PWR8-NEXT: lwz r0, 52(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 48
+; PPC-PWR8-NEXT: lxvw4x vs0, 0, r30
+; PPC-PWR8-NEXT: addi r3, r1, 16
+; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: stxvw4x vs0, 0, r3
+; PPC-PWR8-NEXT: lwz r3, 16(r1)
+; PPC-PWR8-NEXT: lwz r4, 20(r1)
+; PPC-PWR8-NEXT: lwz r5, 24(r1)
+; PPC-PWR8-NEXT: lwz r6, 28(r1)
+; PPC-PWR8-NEXT: lwz r0, 84(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 80
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
@@ -1374,22 +1452,31 @@ define i128 @cas_acqrel_acquire(ptr %a, i128 %cmp, i128 %new) {
; PWR7-LABEL: cas_acqrel_acquire:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
-; PWR7-NEXT: stdu r1, -128(r1)
-; PWR7-NEXT: std r0, 144(r1)
-; PWR7-NEXT: .cfi_def_cfa_offset 128
+; PWR7-NEXT: stdu r1, -176(r1)
+; PWR7-NEXT: std r0, 192(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 176
; PWR7-NEXT: .cfi_offset lr, 16
-; PWR7-NEXT: std r5, 120(r1)
-; PWR7-NEXT: std r4, 112(r1)
-; PWR7-NEXT: addi r4, r1, 112
+; PWR7-NEXT: .cfi_offset r30, -16
+; PWR7-NEXT: std r30, 160(r1) # 8-byte Folded Spill
+; PWR7-NEXT: addi r30, r1, 144
+; PWR7-NEXT: std r5, 152(r1)
+; PWR7-NEXT: std r7, 136(r1)
+; PWR7-NEXT: std r6, 128(r1)
; PWR7-NEXT: mr r5, r6
; PWR7-NEXT: mr r6, r7
; PWR7-NEXT: li r7, 4
; PWR7-NEXT: li r8, 2
+; PWR7-NEXT: std r4, 144(r1)
+; PWR7-NEXT: mr r4, r30
; PWR7-NEXT: bl __atomic_compare_exchange_16
; PWR7-NEXT: nop
-; PWR7-NEXT: ld r4, 120(r1)
+; PWR7-NEXT: lxvd2x vs0, 0, r30
+; PWR7-NEXT: addi r3, r1, 112
+; PWR7-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
+; PWR7-NEXT: stxvd2x vs0, 0, r3
; PWR7-NEXT: ld r3, 112(r1)
-; PWR7-NEXT: addi r1, r1, 128
+; PWR7-NEXT: ld r4, 120(r1)
+; PWR7-NEXT: addi r1, r1, 176
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
@@ -1441,33 +1528,40 @@ define i128 @cas_acqrel_acquire(ptr %a, i128 %cmp, i128 %new) {
; PPC-PWR8-LABEL: cas_acqrel_acquire:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -48(r1)
-; PPC-PWR8-NEXT: stw r0, 52(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
+; PPC-PWR8-NEXT: stwu r1, -80(r1)
+; PPC-PWR8-NEXT: stw r0, 84(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: .cfi_offset r30, -8
; PPC-PWR8-NEXT: mr r4, r3
-; PPC-PWR8-NEXT: lwz r3, 60(r1)
-; PPC-PWR8-NEXT: stw r8, 44(r1)
-; PPC-PWR8-NEXT: stw r7, 40(r1)
-; PPC-PWR8-NEXT: stw r6, 36(r1)
-; PPC-PWR8-NEXT: stw r5, 32(r1)
-; PPC-PWR8-NEXT: addi r5, r1, 32
-; PPC-PWR8-NEXT: addi r6, r1, 16
+; PPC-PWR8-NEXT: lwz r3, 92(r1)
+; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r30, r1, 56
+; PPC-PWR8-NEXT: stw r8, 68(r1)
+; PPC-PWR8-NEXT: stw r7, 64(r1)
+; PPC-PWR8-NEXT: stw r6, 60(r1)
+; PPC-PWR8-NEXT: addi r6, r1, 40
+; PPC-PWR8-NEXT: stw r5, 56(r1)
+; PPC-PWR8-NEXT: mr r5, r30
; PPC-PWR8-NEXT: li r7, 4
; PPC-PWR8-NEXT: li r8, 2
-; PPC-PWR8-NEXT: stw r10, 20(r1)
-; PPC-PWR8-NEXT: stw r9, 16(r1)
-; PPC-PWR8-NEXT: stw r3, 28(r1)
-; PPC-PWR8-NEXT: lwz r3, 56(r1)
-; PPC-PWR8-NEXT: stw r3, 24(r1)
+; PPC-PWR8-NEXT: stw r3, 52(r1)
+; PPC-PWR8-NEXT: lwz r3, 88(r1)
+; PPC-PWR8-NEXT: stw r10, 44(r1)
+; PPC-PWR8-NEXT: stw r9, 40(r1)
+; PPC-PWR8-NEXT: stw r3, 48(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: lwz r6, 44(r1)
-; PPC-PWR8-NEXT: lwz r5, 40(r1)
-; PPC-PWR8-NEXT: lwz r4, 36(r1)
-; PPC-PWR8-NEXT: lwz r3, 32(r1)
-; PPC-PWR8-NEXT: lwz r0, 52(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 48
+; PPC-PWR8-NEXT: lxvw4x vs0, 0, r30
+; PPC-PWR8-NEXT: addi r3, r1, 16
+; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: stxvw4x vs0, 0, r3
+; PPC-PWR8-NEXT: lwz r3, 16(r1)
+; PPC-PWR8-NEXT: lwz r4, 20(r1)
+; PPC-PWR8-NEXT: lwz r5, 24(r1)
+; PPC-PWR8-NEXT: lwz r6, 28(r1)
+; PPC-PWR8-NEXT: lwz r0, 84(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 80
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
@@ -1505,20 +1599,31 @@ define i1 @cas_acqrel_acquire_check_succ(ptr %a, i128 %cmp, i128 %new) {
; PWR7-LABEL: cas_acqrel_acquire_check_succ:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
-; PWR7-NEXT: stdu r1, -128(r1)
-; PWR7-NEXT: std r0, 144(r1)
-; PWR7-NEXT: .cfi_def_cfa_offset 128
+; PWR7-NEXT: stdu r1, -176(r1)
+; PWR7-NEXT: std r0, 192(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 176
; PWR7-NEXT: .cfi_offset lr, 16
-; PWR7-NEXT: std r5, 120(r1)
-; PWR7-NEXT: std r4, 112(r1)
-; PWR7-NEXT: addi r4, r1, 112
+; PWR7-NEXT: .cfi_offset r30, -16
+; PWR7-NEXT: std r30, 160(r1) # 8-byte Folded Spill
+; PWR7-NEXT: addi r30, r1, 144
+; PWR7-NEXT: std r5, 152(r1)
+; PWR7-NEXT: std r7, 136(r1)
+; PWR7-NEXT: std r6, 128(r1)
; PWR7-NEXT: mr r5, r6
; PWR7-NEXT: mr r6, r7
; PWR7-NEXT: li r7, 4
; PWR7-NEXT: li r8, 2
+; PWR7-NEXT: std r4, 144(r1)
+; PWR7-NEXT: mr r4, r30
; PWR7-NEXT: bl __atomic_compare_exchange_16
; PWR7-NEXT: nop
-; PWR7-NEXT: addi r1, r1, 128
+; PWR7-NEXT: lxvd2x vs0, 0, r30
+; PWR7-NEXT: cntlzw r3, r3
+; PWR7-NEXT: addi r4, r1, 112
+; PWR7-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
+; PWR7-NEXT: stxvd2x vs0, 0, r4
+; PWR7-NEXT: rlwinm r3, r3, 27, 31, 31
+; PWR7-NEXT: addi r1, r1, 176
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
@@ -1576,29 +1681,39 @@ define i1 @cas_acqrel_acquire_check_succ(ptr %a, i128 %cmp, i128 %new) {
; PPC-PWR8-LABEL: cas_acqrel_acquire_check_succ:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -48(r1)
-; PPC-PWR8-NEXT: stw r0, 52(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
+; PPC-PWR8-NEXT: stwu r1, -80(r1)
+; PPC-PWR8-NEXT: stw r0, 84(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: .cfi_offset r30, -8
; PPC-PWR8-NEXT: mr r4, r3
-; PPC-PWR8-NEXT: lwz r3, 60(r1)
-; PPC-PWR8-NEXT: stw r8, 44(r1)
-; PPC-PWR8-NEXT: stw r7, 40(r1)
-; PPC-PWR8-NEXT: stw r6, 36(r1)
-; PPC-PWR8-NEXT: stw r5, 32(r1)
-; PPC-PWR8-NEXT: addi r5, r1, 32
-; PPC-PWR8-NEXT: addi r6, r1, 16
+; PPC-PWR8-NEXT: lwz r3, 92(r1)
+; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r30, r1, 56
+; PPC-PWR8-NEXT: stw r8, 68(r1)
+; PPC-PWR8-NEXT: stw r7, 64(r1)
+; PPC-PWR8-NEXT: stw r6, 60(r1)
+; PPC-PWR8-NEXT: addi r6, r1, 40
+; PPC-PWR8-NEXT: stw r5, 56(r1)
+; PPC-PWR8-NEXT: mr r5, r30
; PPC-PWR8-NEXT: li r7, 4
; PPC-PWR8-NEXT: li r8, 2
-; PPC-PWR8-NEXT: stw r10, 20(r1)
-; PPC-PWR8-NEXT: stw r9, 16(r1)
-; PPC-PWR8-NEXT: stw r3, 28(r1)
-; PPC-PWR8-NEXT: lwz r3, 56(r1)
-; PPC-PWR8-NEXT: stw r3, 24(r1)
+; PPC-PWR8-NEXT: stw r3, 52(r1)
+; PPC-PWR8-NEXT: lwz r3, 88(r1)
+; PPC-PWR8-NEXT: stw r10, 44(r1)
+; PPC-PWR8-NEXT: stw r9, 40(r1)
+; PPC-PWR8-NEXT: stw r3, 48(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: lwz r0, 52(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 48
+; PPC-PWR8-NEXT: lxvw4x vs0, 0, r30
+; PPC-PWR8-NEXT: clrlwi r3, r3, 24
+; PPC-PWR8-NEXT: addi r4, r1, 16
+; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: cntlzw r3, r3
+; PPC-PWR8-NEXT: stxvw4x vs0, 0, r4
+; PPC-PWR8-NEXT: rlwinm r3, r3, 27, 31, 31
+; PPC-PWR8-NEXT: lwz r0, 84(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 80
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
@@ -1636,20 +1751,31 @@ define i1 @bool_cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
; PWR7-LABEL: bool_cas_weak_acquire_acquire:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
-; PWR7-NEXT: stdu r1, -128(r1)
-; PWR7-NEXT: std r0, 144(r1)
-; PWR7-NEXT: .cfi_def_cfa_offset 128
+; PWR7-NEXT: stdu r1, -176(r1)
+; PWR7-NEXT: std r0, 192(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 176
; PWR7-NEXT: .cfi_offset lr, 16
-; PWR7-NEXT: std r5, 120(r1)
-; PWR7-NEXT: std r4, 112(r1)
-; PWR7-NEXT: addi r4, r1, 112
+; PWR7-NEXT: .cfi_offset r30, -16
+; PWR7-NEXT: std r30, 160(r1) # 8-byte Folded Spill
+; PWR7-NEXT: addi r30, r1, 144
+; PWR7-NEXT: std r5, 152(r1)
+; PWR7-NEXT: std r7, 136(r1)
+; PWR7-NEXT: std r6, 128(r1)
; PWR7-NEXT: mr r5, r6
; PWR7-NEXT: mr r6, r7
; PWR7-NEXT: li r7, 2
; PWR7-NEXT: li r8, 2
+; PWR7-NEXT: std r4, 144(r1)
+; PWR7-NEXT: mr r4, r30
; PWR7-NEXT: bl __atomic_compare_exchange_16
; PWR7-NEXT: nop
-; PWR7-NEXT: addi r1, r1, 128
+; PWR7-NEXT: lxvd2x vs0, 0, r30
+; PWR7-NEXT: cntlzw r3, r3
+; PWR7-NEXT: addi r4, r1, 112
+; PWR7-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
+; PWR7-NEXT: stxvd2x vs0, 0, r4
+; PWR7-NEXT: rlwinm r3, r3, 27, 31, 31
+; PWR7-NEXT: addi r1, r1, 176
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
@@ -1705,29 +1831,39 @@ define i1 @bool_cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
; PPC-PWR8-LABEL: bool_cas_weak_acquire_acquire:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -48(r1)
-; PPC-PWR8-NEXT: stw r0, 52(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
+; PPC-PWR8-NEXT: stwu r1, -80(r1)
+; PPC-PWR8-NEXT: stw r0, 84(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: .cfi_offset r30, -8
; PPC-PWR8-NEXT: mr r4, r3
-; PPC-PWR8-NEXT: lwz r3, 60(r1)
-; PPC-PWR8-NEXT: stw r8, 44(r1)
-; PPC-PWR8-NEXT: stw r7, 40(r1)
-; PPC-PWR8-NEXT: stw r6, 36(r1)
-; PPC-PWR8-NEXT: stw r5, 32(r1)
-; PPC-PWR8-NEXT: addi r5, r1, 32
-; PPC-PWR8-NEXT: addi r6, r1, 16
+; PPC-PWR8-NEXT: lwz r3, 92(r1)
+; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r30, r1, 56
+; PPC-PWR8-NEXT: stw r8, 68(r1)
+; PPC-PWR8-NEXT: stw r7, 64(r1)
+; PPC-PWR8-NEXT: stw r6, 60(r1)
+; PPC-PWR8-NEXT: addi r6, r1, 40
+; PPC-PWR8-NEXT: stw r5, 56(r1)
+; PPC-PWR8-NEXT: mr r5, r30
; PPC-PWR8-NEXT: li r7, 2
; PPC-PWR8-NEXT: li r8, 2
-; PPC-PWR8-NEXT: stw r10, 20(r1)
-; PPC-PWR8-NEXT: stw r9, 16(r1)
-; PPC-PWR8-NEXT: stw r3, 28(r1)
-; PPC-PWR8-NEXT: lwz r3, 56(r1)
-; PPC-PWR8-NEXT: stw r3, 24(r1)
+; PPC-PWR8-NEXT: stw r3, 52(r1)
+; PPC-PWR8-NEXT: lwz r3, 88(r1)
+; PPC-PWR8-NEXT: stw r10, 44(r1)
+; PPC-PWR8-NEXT: stw r9, 40(r1)
+; PPC-PWR8-NEXT: stw r3, 48(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: lwz r0, 52(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 48
+; PPC-PWR8-NEXT: lxvw4x vs0, 0, r30
+; PPC-PWR8-NEXT: clrlwi r3, r3, 24
+; PPC-PWR8-NEXT: addi r4, r1, 16
+; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: cntlzw r3, r3
+; PPC-PWR8-NEXT: stxvw4x vs0, 0, r4
+; PPC-PWR8-NEXT: rlwinm r3, r3, 27, 31, 31
+; PPC-PWR8-NEXT: lwz r0, 84(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 80
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
diff --git a/llvm/test/CodeGen/PowerPC/atomics.ll b/llvm/test/CodeGen/PowerPC/atomics.ll
index ff5bec53acd257..52495c461e8244 100644
--- a/llvm/test/CodeGen/PowerPC/atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics.ll
@@ -282,23 +282,28 @@ define i64 @cas_weak_i64_release_monotonic(ptr %mem) {
; PPC32-LABEL: cas_weak_i64_release_monotonic:
; PPC32: # %bb.0:
; PPC32-NEXT: mflr r0
-; PPC32-NEXT: stwu r1, -16(r1)
-; PPC32-NEXT: stw r0, 20(r1)
-; PPC32-NEXT: .cfi_def_cfa_offset 16
+; PPC32-NEXT: stwu r1, -32(r1)
+; PPC32-NEXT: stw r0, 36(r1)
+; PPC32-NEXT: .cfi_def_cfa_offset 32
; PPC32-NEXT: .cfi_offset lr, 4
; PPC32-NEXT: li r4, 0
-; PPC32-NEXT: stw r4, 12(r1)
-; PPC32-NEXT: li r5, 0
-; PPC32-NEXT: stw r4, 8(r1)
-; PPC32-NEXT: addi r4, r1, 8
+; PPC32-NEXT: li r5, 1
+; PPC32-NEXT: stw r4, 28(r1)
; PPC32-NEXT: li r6, 1
+; PPC32-NEXT: stw r4, 24(r1)
; PPC32-NEXT: li r7, 3
+; PPC32-NEXT: stw r5, 20(r1)
+; PPC32-NEXT: li r5, 0
+; PPC32-NEXT: stw r4, 16(r1)
+; PPC32-NEXT: addi r4, r1, 24
; PPC32-NEXT: li r8, 0
; PPC32-NEXT: bl __atomic_compare_exchange_8
-; PPC32-NEXT: lwz r4, 12(r1)
-; PPC32-NEXT: lwz r3, 8(r1)
-; PPC32-NEXT: lwz r0, 20(r1)
-; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: lwz r4, 28(r1)
+; PPC32-NEXT: lwz r3, 24(r1)
+; PPC32-NEXT: stw r4, 12(r1)
+; PPC32-NEXT: stw r3, 8(r1)
+; PPC32-NEXT: lwz r0, 36(r1)
+; PPC32-NEXT: addi r1, r1, 32
; PPC32-NEXT: mtlr r0
; PPC32-NEXT: blr
;
diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
index c47db319fc2c3a..abbd210eaedc1f 100644
--- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
@@ -30,10 +30,13 @@ define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
+; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
+; RV32I-NEXT: lbu a0, 11(sp)
+; RV32I-NEXT: sb a0, 9(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -66,10 +69,15 @@ define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
+; RV64I-NEXT: sb a2, 6(sp)
+; RV64I-NEXT: slli a2, a2, 56
+; RV64I-NEXT: srai a2, a2, 56
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
+; RV64I-NEXT: lbu a0, 7(sp)
+; RV64I-NEXT: sb a0, 5(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -157,10 +165,13 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
+; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
+; RV32I-NEXT: lbu a0, 11(sp)
+; RV32I-NEXT: sb a0, 9(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -262,10 +273,15 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
+; RV64I-NEXT: sb a2, 6(sp)
+; RV64I-NEXT: slli a2, a2, 56
+; RV64I-NEXT: srai a2, a2, 56
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
+; RV64I-NEXT: lbu a0, 7(sp)
+; RV64I-NEXT: sb a0, 5(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -381,10 +397,13 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
+; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_1
+; RV32I-NEXT: lbu a0, 11(sp)
+; RV32I-NEXT: sb a0, 9(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -486,10 +505,15 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
+; RV64I-NEXT: sb a2, 6(sp)
+; RV64I-NEXT: slli a2, a2, 56
+; RV64I-NEXT: srai a2, a2, 56
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_1
+; RV64I-NEXT: lbu a0, 7(sp)
+; RV64I-NEXT: sb a0, 5(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -605,10 +629,13 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
+; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
+; RV32I-NEXT: lbu a0, 11(sp)
+; RV32I-NEXT: sb a0, 9(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -710,10 +737,15 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
+; RV64I-NEXT: sb a2, 6(sp)
+; RV64I-NEXT: slli a2, a2, 56
+; RV64I-NEXT: srai a2, a2, 56
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
+; RV64I-NEXT: lbu a0, 7(sp)
+; RV64I-NEXT: sb a0, 5(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -829,10 +861,13 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
+; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_1
+; RV32I-NEXT: lbu a0, 11(sp)
+; RV32I-NEXT: sb a0, 9(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -934,10 +969,15 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
+; RV64I-NEXT: sb a2, 6(sp)
+; RV64I-NEXT: slli a2, a2, 56
+; RV64I-NEXT: srai a2, a2, 56
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_1
+; RV64I-NEXT: lbu a0, 7(sp)
+; RV64I-NEXT: sb a0, 5(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -1053,10 +1093,13 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
+; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
+; RV32I-NEXT: lbu a0, 11(sp)
+; RV32I-NEXT: sb a0, 9(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -1158,10 +1201,15 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
+; RV64I-NEXT: sb a2, 6(sp)
+; RV64I-NEXT: slli a2, a2, 56
+; RV64I-NEXT: srai a2, a2, 56
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
+; RV64I-NEXT: lbu a0, 7(sp)
+; RV64I-NEXT: sb a0, 5(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -1277,10 +1325,13 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
+; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_1
+; RV32I-NEXT: lbu a0, 11(sp)
+; RV32I-NEXT: sb a0, 9(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -1382,10 +1433,15 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
+; RV64I-NEXT: sb a2, 6(sp)
+; RV64I-NEXT: slli a2, a2, 56
+; RV64I-NEXT: srai a2, a2, 56
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_1
+; RV64I-NEXT: lbu a0, 7(sp)
+; RV64I-NEXT: sb a0, 5(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -1501,10 +1557,13 @@ define void @cmpxchg_i8_seq_cst_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
+; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
+; RV32I-NEXT: lbu a0, 11(sp)
+; RV32I-NEXT: sb a0, 9(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -1537,10 +1596,15 @@ define void @cmpxchg_i8_seq_cst_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
+; RV64I-NEXT: sb a2, 6(sp)
+; RV64I-NEXT: slli a2, a2, 56
+; RV64I-NEXT: srai a2, a2, 56
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
+; RV64I-NEXT: lbu a0, 7(sp)
+; RV64I-NEXT: sb a0, 5(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -1633,10 +1697,13 @@ define void @cmpxchg_i8_seq_cst_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
+; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_1
+; RV32I-NEXT: lbu a0, 11(sp)
+; RV32I-NEXT: sb a0, 9(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -1669,10 +1736,15 @@ define void @cmpxchg_i8_seq_cst_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
+; RV64I-NEXT: sb a2, 6(sp)
+; RV64I-NEXT: slli a2, a2, 56
+; RV64I-NEXT: srai a2, a2, 56
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_1
+; RV64I-NEXT: lbu a0, 7(sp)
+; RV64I-NEXT: sb a0, 5(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -1765,10 +1837,13 @@ define void @cmpxchg_i8_seq_cst_seq_cst(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
+; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: call __atomic_compare_exchange_1
+; RV32I-NEXT: lbu a0, 11(sp)
+; RV32I-NEXT: sb a0, 9(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -1801,10 +1876,15 @@ define void @cmpxchg_i8_seq_cst_seq_cst(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
+; RV64I-NEXT: sb a2, 6(sp)
+; RV64I-NEXT: slli a2, a2, 56
+; RV64I-NEXT: srai a2, a2, 56
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: call __atomic_compare_exchange_1
+; RV64I-NEXT: lbu a0, 7(sp)
+; RV64I-NEXT: sb a0, 5(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -1897,10 +1977,13 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
+; RV32I-NEXT: lh a0, 10(sp)
+; RV32I-NEXT: sh a0, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -1934,10 +2017,15 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a2, 4(sp)
+; RV64I-NEXT: slli a2, a2, 48
+; RV64I-NEXT: srai a2, a2, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
+; RV64I-NEXT: lh a0, 6(sp)
+; RV64I-NEXT: sh a0, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -2028,10 +2116,13 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
+; RV32I-NEXT: lh a0, 10(sp)
+; RV32I-NEXT: sh a0, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -2137,10 +2228,15 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a2, 4(sp)
+; RV64I-NEXT: slli a2, a2, 48
+; RV64I-NEXT: srai a2, a2, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
+; RV64I-NEXT: lh a0, 6(sp)
+; RV64I-NEXT: sh a0, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -2260,10 +2356,13 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_2
+; RV32I-NEXT: lh a0, 10(sp)
+; RV32I-NEXT: sh a0, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -2369,10 +2468,15 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a2, 4(sp)
+; RV64I-NEXT: slli a2, a2, 48
+; RV64I-NEXT: srai a2, a2, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_2
+; RV64I-NEXT: lh a0, 6(sp)
+; RV64I-NEXT: sh a0, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -2492,10 +2596,13 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
+; RV32I-NEXT: lh a0, 10(sp)
+; RV32I-NEXT: sh a0, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -2601,10 +2708,15 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a2, 4(sp)
+; RV64I-NEXT: slli a2, a2, 48
+; RV64I-NEXT: srai a2, a2, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
+; RV64I-NEXT: lh a0, 6(sp)
+; RV64I-NEXT: sh a0, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -2724,10 +2836,13 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_2
+; RV32I-NEXT: lh a0, 10(sp)
+; RV32I-NEXT: sh a0, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -2833,10 +2948,15 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a2, 4(sp)
+; RV64I-NEXT: slli a2, a2, 48
+; RV64I-NEXT: srai a2, a2, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_2
+; RV64I-NEXT: lh a0, 6(sp)
+; RV64I-NEXT: sh a0, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -2956,10 +3076,13 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
+; RV32I-NEXT: lh a0, 10(sp)
+; RV32I-NEXT: sh a0, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -3065,10 +3188,15 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a2, 4(sp)
+; RV64I-NEXT: slli a2, a2, 48
+; RV64I-NEXT: srai a2, a2, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
+; RV64I-NEXT: lh a0, 6(sp)
+; RV64I-NEXT: sh a0, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -3188,10 +3316,13 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_2
+; RV32I-NEXT: lh a0, 10(sp)
+; RV32I-NEXT: sh a0, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -3297,10 +3428,15 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a2, 4(sp)
+; RV64I-NEXT: slli a2, a2, 48
+; RV64I-NEXT: srai a2, a2, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_2
+; RV64I-NEXT: lh a0, 6(sp)
+; RV64I-NEXT: sh a0, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -3420,10 +3556,13 @@ define void @cmpxchg_i16_seq_cst_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
+; RV32I-NEXT: lh a0, 10(sp)
+; RV32I-NEXT: sh a0, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -3457,10 +3596,15 @@ define void @cmpxchg_i16_seq_cst_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a2, 4(sp)
+; RV64I-NEXT: slli a2, a2, 48
+; RV64I-NEXT: srai a2, a2, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
+; RV64I-NEXT: lh a0, 6(sp)
+; RV64I-NEXT: sh a0, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -3556,10 +3700,13 @@ define void @cmpxchg_i16_seq_cst_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_2
+; RV32I-NEXT: lh a0, 10(sp)
+; RV32I-NEXT: sh a0, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -3593,10 +3740,15 @@ define void @cmpxchg_i16_seq_cst_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a2, 4(sp)
+; RV64I-NEXT: slli a2, a2, 48
+; RV64I-NEXT: srai a2, a2, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_2
+; RV64I-NEXT: lh a0, 6(sp)
+; RV64I-NEXT: sh a0, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -3692,10 +3844,13 @@ define void @cmpxchg_i16_seq_cst_seq_cst(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: call __atomic_compare_exchange_2
+; RV32I-NEXT: lh a0, 10(sp)
+; RV32I-NEXT: sh a0, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -3729,10 +3884,15 @@ define void @cmpxchg_i16_seq_cst_seq_cst(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a2, 4(sp)
+; RV64I-NEXT: slli a2, a2, 48
+; RV64I-NEXT: srai a2, a2, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: call __atomic_compare_exchange_2
+; RV64I-NEXT: lh a0, 6(sp)
+; RV64I-NEXT: sh a0, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -3828,10 +3988,13 @@ define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounw
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
+; RV32I-NEXT: lw a0, 8(sp)
+; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -3865,15 +4028,19 @@ define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounw
;
; RV64I-LABEL: cmpxchg_i32_monotonic_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 4(sp)
-; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 20(sp)
+; RV64I-NEXT: sw a2, 16(sp)
+; RV64I-NEXT: sext.w a2, a2
+; RV64I-NEXT: addi a1, sp, 20
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: lw a0, 20(sp)
+; RV64I-NEXT: sw a0, 12(sp)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i32_monotonic_monotonic:
@@ -3919,10 +4086,13 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
+; RV32I-NEXT: lw a0, 8(sp)
+; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -3961,15 +4131,19 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
;
; RV64I-LABEL: cmpxchg_i32_acquire_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 4(sp)
-; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 20(sp)
+; RV64I-NEXT: sw a2, 16(sp)
+; RV64I-NEXT: sext.w a2, a2
+; RV64I-NEXT: addi a1, sp, 20
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: lw a0, 20(sp)
+; RV64I-NEXT: sw a0, 12(sp)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i32_acquire_monotonic:
@@ -4025,10 +4199,13 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_4
+; RV32I-NEXT: lw a0, 8(sp)
+; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4067,15 +4244,19 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
;
; RV64I-LABEL: cmpxchg_i32_acquire_acquire:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 4(sp)
-; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 20(sp)
+; RV64I-NEXT: sw a2, 16(sp)
+; RV64I-NEXT: sext.w a2, a2
+; RV64I-NEXT: addi a1, sp, 20
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: lw a0, 20(sp)
+; RV64I-NEXT: sw a0, 12(sp)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i32_acquire_acquire:
@@ -4131,10 +4312,13 @@ define void @cmpxchg_i32_release_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
+; RV32I-NEXT: lw a0, 8(sp)
+; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4173,15 +4357,19 @@ define void @cmpxchg_i32_release_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
;
; RV64I-LABEL: cmpxchg_i32_release_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 4(sp)
-; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 20(sp)
+; RV64I-NEXT: sw a2, 16(sp)
+; RV64I-NEXT: sext.w a2, a2
+; RV64I-NEXT: addi a1, sp, 20
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: lw a0, 20(sp)
+; RV64I-NEXT: sw a0, 12(sp)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i32_release_monotonic:
@@ -4237,10 +4425,13 @@ define void @cmpxchg_i32_release_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_4
+; RV32I-NEXT: lw a0, 8(sp)
+; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4279,15 +4470,19 @@ define void @cmpxchg_i32_release_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
;
; RV64I-LABEL: cmpxchg_i32_release_acquire:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 4(sp)
-; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 20(sp)
+; RV64I-NEXT: sw a2, 16(sp)
+; RV64I-NEXT: sext.w a2, a2
+; RV64I-NEXT: addi a1, sp, 20
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: lw a0, 20(sp)
+; RV64I-NEXT: sw a0, 12(sp)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i32_release_acquire:
@@ -4343,10 +4538,13 @@ define void @cmpxchg_i32_acq_rel_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
+; RV32I-NEXT: lw a0, 8(sp)
+; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4385,15 +4583,19 @@ define void @cmpxchg_i32_acq_rel_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
;
; RV64I-LABEL: cmpxchg_i32_acq_rel_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 4(sp)
-; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 20(sp)
+; RV64I-NEXT: sw a2, 16(sp)
+; RV64I-NEXT: sext.w a2, a2
+; RV64I-NEXT: addi a1, sp, 20
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: lw a0, 20(sp)
+; RV64I-NEXT: sw a0, 12(sp)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i32_acq_rel_monotonic:
@@ -4449,10 +4651,13 @@ define void @cmpxchg_i32_acq_rel_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_4
+; RV32I-NEXT: lw a0, 8(sp)
+; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4491,15 +4696,19 @@ define void @cmpxchg_i32_acq_rel_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
;
; RV64I-LABEL: cmpxchg_i32_acq_rel_acquire:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 4(sp)
-; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 20(sp)
+; RV64I-NEXT: sw a2, 16(sp)
+; RV64I-NEXT: sext.w a2, a2
+; RV64I-NEXT: addi a1, sp, 20
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: lw a0, 20(sp)
+; RV64I-NEXT: sw a0, 12(sp)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i32_acq_rel_acquire:
@@ -4555,10 +4764,13 @@ define void @cmpxchg_i32_seq_cst_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
+; RV32I-NEXT: lw a0, 8(sp)
+; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4597,15 +4809,19 @@ define void @cmpxchg_i32_seq_cst_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
;
; RV64I-LABEL: cmpxchg_i32_seq_cst_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 4(sp)
-; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 20(sp)
+; RV64I-NEXT: sw a2, 16(sp)
+; RV64I-NEXT: sext.w a2, a2
+; RV64I-NEXT: addi a1, sp, 20
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: lw a0, 20(sp)
+; RV64I-NEXT: sw a0, 12(sp)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_monotonic:
@@ -4661,10 +4877,13 @@ define void @cmpxchg_i32_seq_cst_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_4
+; RV32I-NEXT: lw a0, 8(sp)
+; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4703,15 +4922,19 @@ define void @cmpxchg_i32_seq_cst_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
;
; RV64I-LABEL: cmpxchg_i32_seq_cst_acquire:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 4(sp)
-; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 20(sp)
+; RV64I-NEXT: sw a2, 16(sp)
+; RV64I-NEXT: sext.w a2, a2
+; RV64I-NEXT: addi a1, sp, 20
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: lw a0, 20(sp)
+; RV64I-NEXT: sw a0, 12(sp)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_acquire:
@@ -4767,10 +4990,13 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: call __atomic_compare_exchange_4
+; RV32I-NEXT: lw a0, 8(sp)
+; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4809,15 +5035,19 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind
;
; RV64I-LABEL: cmpxchg_i32_seq_cst_seq_cst:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 4(sp)
-; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 20(sp)
+; RV64I-NEXT: sw a2, 16(sp)
+; RV64I-NEXT: sext.w a2, a2
+; RV64I-NEXT: addi a1, sp, 20
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: lw a0, 20(sp)
+; RV64I-NEXT: sw a0, 12(sp)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_seq_cst:
@@ -4870,47 +5100,62 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind
define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind {
; RV32I-LABEL: cmpxchg_i64_monotonic_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw a2, 4(sp)
-; RV32I-NEXT: sw a1, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw a2, 20(sp)
+; RV32I-NEXT: sw a1, 16(sp)
+; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw a0, 16(sp)
+; RV32I-NEXT: lw a1, 20(sp)
+; RV32I-NEXT: sw a0, 0(sp)
+; RV32I-NEXT: sw a1, 4(sp)
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: cmpxchg_i64_monotonic_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -16
-; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
+; RV32IA-NEXT: addi sp, sp, -32
+; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw a2, 20(sp)
+; RV32IA-NEXT: sw a1, 16(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a3, 8(sp)
+; RV32IA-NEXT: addi a1, sp, 16
; RV32IA-NEXT: mv a2, a3
; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: lw a0, 16(sp)
+; RV32IA-NEXT: lw a1, 20(sp)
+; RV32IA-NEXT: sw a0, 0(sp)
+; RV32IA-NEXT: sw a1, 4(sp)
+; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 32
; RV32IA-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_monotonic_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a1, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ld a0, 16(sp)
+; RV64I-NEXT: sd a0, 0(sp)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i64_monotonic_monotonic:
@@ -4951,49 +5196,64 @@ define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounw
define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind {
; RV32I-LABEL: cmpxchg_i64_acquire_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a5, a4
-; RV32I-NEXT: sw a2, 4(sp)
-; RV32I-NEXT: sw a1, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a2, 20(sp)
+; RV32I-NEXT: sw a1, 16(sp)
+; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw a0, 16(sp)
+; RV32I-NEXT: lw a1, 20(sp)
+; RV32I-NEXT: sw a0, 0(sp)
+; RV32I-NEXT: sw a1, 4(sp)
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: cmpxchg_i64_acquire_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -16
-; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -32
+; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a5, a4
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
+; RV32IA-NEXT: sw a2, 20(sp)
+; RV32IA-NEXT: sw a1, 16(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a3, 8(sp)
+; RV32IA-NEXT: addi a1, sp, 16
; RV32IA-NEXT: li a4, 2
; RV32IA-NEXT: mv a2, a3
; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: lw a0, 16(sp)
+; RV32IA-NEXT: lw a1, 20(sp)
+; RV32IA-NEXT: sw a0, 0(sp)
+; RV32IA-NEXT: sw a1, 4(sp)
+; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 32
; RV32IA-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_acquire_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a1, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ld a0, 16(sp)
+; RV64I-NEXT: sd a0, 0(sp)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i64_acquire_monotonic:
@@ -5044,49 +5304,64 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind {
; RV32I-LABEL: cmpxchg_i64_acquire_acquire:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a6, a4
-; RV32I-NEXT: sw a2, 4(sp)
-; RV32I-NEXT: sw a1, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a2, 20(sp)
+; RV32I-NEXT: sw a1, 16(sp)
+; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: mv a3, a6
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw a0, 16(sp)
+; RV32I-NEXT: lw a1, 20(sp)
+; RV32I-NEXT: sw a0, 0(sp)
+; RV32I-NEXT: sw a1, 4(sp)
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: cmpxchg_i64_acquire_acquire:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -16
-; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -32
+; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a6, a4
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
+; RV32IA-NEXT: sw a2, 20(sp)
+; RV32IA-NEXT: sw a1, 16(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a3, 8(sp)
+; RV32IA-NEXT: addi a1, sp, 16
; RV32IA-NEXT: li a4, 2
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a2, a3
; RV32IA-NEXT: mv a3, a6
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: lw a0, 16(sp)
+; RV32IA-NEXT: lw a1, 20(sp)
+; RV32IA-NEXT: sw a0, 0(sp)
+; RV32IA-NEXT: sw a1, 4(sp)
+; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 32
; RV32IA-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_acquire_acquire:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a1, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ld a0, 16(sp)
+; RV64I-NEXT: sd a0, 0(sp)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i64_acquire_acquire:
@@ -5137,49 +5412,64 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
define void @cmpxchg_i64_release_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind {
; RV32I-LABEL: cmpxchg_i64_release_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a5, a4
-; RV32I-NEXT: sw a2, 4(sp)
-; RV32I-NEXT: sw a1, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a2, 20(sp)
+; RV32I-NEXT: sw a1, 16(sp)
+; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a4, 3
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw a0, 16(sp)
+; RV32I-NEXT: lw a1, 20(sp)
+; RV32I-NEXT: sw a0, 0(sp)
+; RV32I-NEXT: sw a1, 4(sp)
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: cmpxchg_i64_release_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -16
-; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -32
+; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a5, a4
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
+; RV32IA-NEXT: sw a2, 20(sp)
+; RV32IA-NEXT: sw a1, 16(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a3, 8(sp)
+; RV32IA-NEXT: addi a1, sp, 16
; RV32IA-NEXT: li a4, 3
; RV32IA-NEXT: mv a2, a3
; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: lw a0, 16(sp)
+; RV32IA-NEXT: lw a1, 20(sp)
+; RV32IA-NEXT: sw a0, 0(sp)
+; RV32IA-NEXT: sw a1, 4(sp)
+; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 32
; RV32IA-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_release_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a1, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ld a0, 16(sp)
+; RV64I-NEXT: sd a0, 0(sp)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i64_release_monotonic:
@@ -5230,49 +5520,64 @@ define void @cmpxchg_i64_release_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
define void @cmpxchg_i64_release_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind {
; RV32I-LABEL: cmpxchg_i64_release_acquire:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a6, a4
-; RV32I-NEXT: sw a2, 4(sp)
-; RV32I-NEXT: sw a1, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a2, 20(sp)
+; RV32I-NEXT: sw a1, 16(sp)
+; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a4, 3
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: mv a3, a6
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw a0, 16(sp)
+; RV32I-NEXT: lw a1, 20(sp)
+; RV32I-NEXT: sw a0, 0(sp)
+; RV32I-NEXT: sw a1, 4(sp)
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: cmpxchg_i64_release_acquire:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -16
-; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -32
+; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a6, a4
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
+; RV32IA-NEXT: sw a2, 20(sp)
+; RV32IA-NEXT: sw a1, 16(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a3, 8(sp)
+; RV32IA-NEXT: addi a1, sp, 16
; RV32IA-NEXT: li a4, 3
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a2, a3
; RV32IA-NEXT: mv a3, a6
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: lw a0, 16(sp)
+; RV32IA-NEXT: lw a1, 20(sp)
+; RV32IA-NEXT: sw a0, 0(sp)
+; RV32IA-NEXT: sw a1, 4(sp)
+; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 32
; RV32IA-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_release_acquire:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a1, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ld a0, 16(sp)
+; RV64I-NEXT: sd a0, 0(sp)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i64_release_acquire:
@@ -5323,49 +5628,64 @@ define void @cmpxchg_i64_release_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
define void @cmpxchg_i64_acq_rel_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind {
; RV32I-LABEL: cmpxchg_i64_acq_rel_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a5, a4
-; RV32I-NEXT: sw a2, 4(sp)
-; RV32I-NEXT: sw a1, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a2, 20(sp)
+; RV32I-NEXT: sw a1, 16(sp)
+; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a4, 4
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw a0, 16(sp)
+; RV32I-NEXT: lw a1, 20(sp)
+; RV32I-NEXT: sw a0, 0(sp)
+; RV32I-NEXT: sw a1, 4(sp)
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: cmpxchg_i64_acq_rel_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -16
-; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -32
+; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a5, a4
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
+; RV32IA-NEXT: sw a2, 20(sp)
+; RV32IA-NEXT: sw a1, 16(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a3, 8(sp)
+; RV32IA-NEXT: addi a1, sp, 16
; RV32IA-NEXT: li a4, 4
; RV32IA-NEXT: mv a2, a3
; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: lw a0, 16(sp)
+; RV32IA-NEXT: lw a1, 20(sp)
+; RV32IA-NEXT: sw a0, 0(sp)
+; RV32IA-NEXT: sw a1, 4(sp)
+; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 32
; RV32IA-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_acq_rel_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a1, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ld a0, 16(sp)
+; RV64I-NEXT: sd a0, 0(sp)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i64_acq_rel_monotonic:
@@ -5416,49 +5736,64 @@ define void @cmpxchg_i64_acq_rel_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
define void @cmpxchg_i64_acq_rel_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind {
; RV32I-LABEL: cmpxchg_i64_acq_rel_acquire:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a6, a4
-; RV32I-NEXT: sw a2, 4(sp)
-; RV32I-NEXT: sw a1, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a2, 20(sp)
+; RV32I-NEXT: sw a1, 16(sp)
+; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a4, 4
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: mv a3, a6
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw a0, 16(sp)
+; RV32I-NEXT: lw a1, 20(sp)
+; RV32I-NEXT: sw a0, 0(sp)
+; RV32I-NEXT: sw a1, 4(sp)
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: cmpxchg_i64_acq_rel_acquire:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -16
-; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -32
+; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a6, a4
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
+; RV32IA-NEXT: sw a2, 20(sp)
+; RV32IA-NEXT: sw a1, 16(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a3, 8(sp)
+; RV32IA-NEXT: addi a1, sp, 16
; RV32IA-NEXT: li a4, 4
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a2, a3
; RV32IA-NEXT: mv a3, a6
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: lw a0, 16(sp)
+; RV32IA-NEXT: lw a1, 20(sp)
+; RV32IA-NEXT: sw a0, 0(sp)
+; RV32IA-NEXT: sw a1, 4(sp)
+; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 32
; RV32IA-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_acq_rel_acquire:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a1, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ld a0, 16(sp)
+; RV64I-NEXT: sd a0, 0(sp)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i64_acq_rel_acquire:
@@ -5509,49 +5844,64 @@ define void @cmpxchg_i64_acq_rel_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
define void @cmpxchg_i64_seq_cst_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind {
; RV32I-LABEL: cmpxchg_i64_seq_cst_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a5, a4
-; RV32I-NEXT: sw a2, 4(sp)
-; RV32I-NEXT: sw a1, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a2, 20(sp)
+; RV32I-NEXT: sw a1, 16(sp)
+; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw a0, 16(sp)
+; RV32I-NEXT: lw a1, 20(sp)
+; RV32I-NEXT: sw a0, 0(sp)
+; RV32I-NEXT: sw a1, 4(sp)
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: cmpxchg_i64_seq_cst_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -16
-; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -32
+; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a5, a4
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
+; RV32IA-NEXT: sw a2, 20(sp)
+; RV32IA-NEXT: sw a1, 16(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a3, 8(sp)
+; RV32IA-NEXT: addi a1, sp, 16
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: mv a2, a3
; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: lw a0, 16(sp)
+; RV32IA-NEXT: lw a1, 20(sp)
+; RV32IA-NEXT: sw a0, 0(sp)
+; RV32IA-NEXT: sw a1, 4(sp)
+; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 32
; RV32IA-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_seq_cst_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a1, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ld a0, 16(sp)
+; RV64I-NEXT: sd a0, 0(sp)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_monotonic:
@@ -5602,49 +5952,64 @@ define void @cmpxchg_i64_seq_cst_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
define void @cmpxchg_i64_seq_cst_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind {
; RV32I-LABEL: cmpxchg_i64_seq_cst_acquire:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a6, a4
-; RV32I-NEXT: sw a2, 4(sp)
-; RV32I-NEXT: sw a1, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a2, 20(sp)
+; RV32I-NEXT: sw a1, 16(sp)
+; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: mv a3, a6
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw a0, 16(sp)
+; RV32I-NEXT: lw a1, 20(sp)
+; RV32I-NEXT: sw a0, 0(sp)
+; RV32I-NEXT: sw a1, 4(sp)
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: cmpxchg_i64_seq_cst_acquire:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -16
-; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -32
+; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a6, a4
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
+; RV32IA-NEXT: sw a2, 20(sp)
+; RV32IA-NEXT: sw a1, 16(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a3, 8(sp)
+; RV32IA-NEXT: addi a1, sp, 16
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a2, a3
; RV32IA-NEXT: mv a3, a6
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: lw a0, 16(sp)
+; RV32IA-NEXT: lw a1, 20(sp)
+; RV32IA-NEXT: sw a0, 0(sp)
+; RV32IA-NEXT: sw a1, 4(sp)
+; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 32
; RV32IA-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_seq_cst_acquire:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a1, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ld a0, 16(sp)
+; RV64I-NEXT: sd a0, 0(sp)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_acquire:
@@ -5695,49 +6060,64 @@ define void @cmpxchg_i64_seq_cst_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
define void @cmpxchg_i64_seq_cst_seq_cst(ptr %ptr, i64 %cmp, i64 %val) nounwind {
; RV32I-LABEL: cmpxchg_i64_seq_cst_seq_cst:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a6, a4
-; RV32I-NEXT: sw a2, 4(sp)
-; RV32I-NEXT: sw a1, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a2, 20(sp)
+; RV32I-NEXT: sw a1, 16(sp)
+; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: mv a3, a6
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw a0, 16(sp)
+; RV32I-NEXT: lw a1, 20(sp)
+; RV32I-NEXT: sw a0, 0(sp)
+; RV32I-NEXT: sw a1, 4(sp)
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: cmpxchg_i64_seq_cst_seq_cst:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -16
-; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -32
+; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a6, a4
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
+; RV32IA-NEXT: sw a2, 20(sp)
+; RV32IA-NEXT: sw a1, 16(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a3, 8(sp)
+; RV32IA-NEXT: addi a1, sp, 16
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
; RV32IA-NEXT: mv a2, a3
; RV32IA-NEXT: mv a3, a6
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: lw a0, 16(sp)
+; RV32IA-NEXT: lw a1, 20(sp)
+; RV32IA-NEXT: sw a0, 0(sp)
+; RV32IA-NEXT: sw a1, 4(sp)
+; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 32
; RV32IA-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_seq_cst_seq_cst:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a1, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ld a0, 16(sp)
+; RV64I-NEXT: sd a0, 0(sp)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_seq_cst:
diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll b/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll
index 8d3fc96109262e..348eb8e0ade3f1 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll
@@ -186,11 +186,11 @@ define void @amomax_w_discard(ptr %a, i32 %b) nounwind {
define void @amomax_d_discard(ptr %a, i64 %b) nounwind {
; RV32-LABEL: amomax_d_discard:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32-NEXT: addi sp, sp, -48
+; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
; RV32-NEXT: lw a4, 4(a0)
; RV32-NEXT: lw a5, 0(a0)
@@ -199,16 +199,21 @@ define void @amomax_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: j .LBB11_2
; RV32-NEXT: .LBB11_1: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB11_2 Depth=1
-; RV32-NEXT: sw a5, 8(sp)
-; RV32-NEXT: sw a4, 12(sp)
-; RV32-NEXT: addi a1, sp, 8
+; RV32-NEXT: sw a5, 24(sp)
+; RV32-NEXT: sw a4, 28(sp)
+; RV32-NEXT: sw a3, 20(sp)
+; RV32-NEXT: sw a2, 16(sp)
+; RV32-NEXT: addi a1, sp, 24
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw a4, 12(sp)
-; RV32-NEXT: lw a5, 8(sp)
-; RV32-NEXT: bnez a0, .LBB11_6
+; RV32-NEXT: lw a4, 28(sp)
+; RV32-NEXT: lw a5, 24(sp)
+; RV32-NEXT: andi a0, a0, 255
+; RV32-NEXT: sw a4, 12(sp)
+; RV32-NEXT: sw a5, 8(sp)
+; RV32-NEXT: beqz a0, .LBB11_6
; RV32-NEXT: .LBB11_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: beq a4, s1, .LBB11_4
@@ -230,11 +235,11 @@ define void @amomax_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: mv a3, s1
; RV32-NEXT: j .LBB11_1
; RV32-NEXT: .LBB11_6: # %atomicrmw.end
-; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 48
; RV32-NEXT: ret
;
; RV64-LABEL: amomax_d_discard:
@@ -262,11 +267,11 @@ define void @amomaxu_w_discard(ptr %a, i32 %b) nounwind {
define void @amomaxu_d_discard(ptr %a, i64 %b) nounwind {
; RV32-LABEL: amomaxu_d_discard:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32-NEXT: addi sp, sp, -48
+; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
; RV32-NEXT: lw a4, 4(a0)
; RV32-NEXT: lw a5, 0(a0)
@@ -275,16 +280,21 @@ define void @amomaxu_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: j .LBB13_2
; RV32-NEXT: .LBB13_1: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB13_2 Depth=1
-; RV32-NEXT: sw a5, 8(sp)
-; RV32-NEXT: sw a4, 12(sp)
-; RV32-NEXT: addi a1, sp, 8
+; RV32-NEXT: sw a5, 24(sp)
+; RV32-NEXT: sw a4, 28(sp)
+; RV32-NEXT: sw a3, 20(sp)
+; RV32-NEXT: sw a2, 16(sp)
+; RV32-NEXT: addi a1, sp, 24
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw a4, 12(sp)
-; RV32-NEXT: lw a5, 8(sp)
-; RV32-NEXT: bnez a0, .LBB13_6
+; RV32-NEXT: lw a4, 28(sp)
+; RV32-NEXT: lw a5, 24(sp)
+; RV32-NEXT: andi a0, a0, 255
+; RV32-NEXT: sw a4, 12(sp)
+; RV32-NEXT: sw a5, 8(sp)
+; RV32-NEXT: beqz a0, .LBB13_6
; RV32-NEXT: .LBB13_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: beq a4, s1, .LBB13_4
@@ -306,11 +316,11 @@ define void @amomaxu_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: mv a3, s1
; RV32-NEXT: j .LBB13_1
; RV32-NEXT: .LBB13_6: # %atomicrmw.end
-; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 48
; RV32-NEXT: ret
;
; RV64-LABEL: amomaxu_d_discard:
@@ -338,11 +348,11 @@ define void @amomin_w_discard(ptr %a, i32 %b) nounwind {
define void @amomin_d_discard(ptr %a, i64 %b) nounwind {
; RV32-LABEL: amomin_d_discard:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32-NEXT: addi sp, sp, -48
+; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
; RV32-NEXT: lw a4, 4(a0)
; RV32-NEXT: lw a5, 0(a0)
@@ -351,16 +361,21 @@ define void @amomin_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: j .LBB15_2
; RV32-NEXT: .LBB15_1: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB15_2 Depth=1
-; RV32-NEXT: sw a5, 8(sp)
-; RV32-NEXT: sw a4, 12(sp)
-; RV32-NEXT: addi a1, sp, 8
+; RV32-NEXT: sw a5, 24(sp)
+; RV32-NEXT: sw a4, 28(sp)
+; RV32-NEXT: sw a3, 20(sp)
+; RV32-NEXT: sw a2, 16(sp)
+; RV32-NEXT: addi a1, sp, 24
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw a4, 12(sp)
-; RV32-NEXT: lw a5, 8(sp)
-; RV32-NEXT: bnez a0, .LBB15_6
+; RV32-NEXT: lw a4, 28(sp)
+; RV32-NEXT: lw a5, 24(sp)
+; RV32-NEXT: andi a0, a0, 255
+; RV32-NEXT: sw a4, 12(sp)
+; RV32-NEXT: sw a5, 8(sp)
+; RV32-NEXT: beqz a0, .LBB15_6
; RV32-NEXT: .LBB15_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: beq a4, s1, .LBB15_4
@@ -382,11 +397,11 @@ define void @amomin_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: mv a3, s1
; RV32-NEXT: j .LBB15_1
; RV32-NEXT: .LBB15_6: # %atomicrmw.end
-; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 48
; RV32-NEXT: ret
;
; RV64-LABEL: amomin_d_discard:
@@ -414,11 +429,11 @@ define void @amominu_w_discard(ptr %a, i32 %b) nounwind {
define void @amominu_d_discard(ptr %a, i64 %b) nounwind {
; RV32-LABEL: amominu_d_discard:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32-NEXT: addi sp, sp, -48
+; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
; RV32-NEXT: lw a4, 4(a0)
; RV32-NEXT: lw a5, 0(a0)
@@ -427,16 +442,21 @@ define void @amominu_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: j .LBB17_2
; RV32-NEXT: .LBB17_1: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB17_2 Depth=1
-; RV32-NEXT: sw a5, 8(sp)
-; RV32-NEXT: sw a4, 12(sp)
-; RV32-NEXT: addi a1, sp, 8
+; RV32-NEXT: sw a5, 24(sp)
+; RV32-NEXT: sw a4, 28(sp)
+; RV32-NEXT: sw a3, 20(sp)
+; RV32-NEXT: sw a2, 16(sp)
+; RV32-NEXT: addi a1, sp, 24
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw a4, 12(sp)
-; RV32-NEXT: lw a5, 8(sp)
-; RV32-NEXT: bnez a0, .LBB17_6
+; RV32-NEXT: lw a4, 28(sp)
+; RV32-NEXT: lw a5, 24(sp)
+; RV32-NEXT: andi a0, a0, 255
+; RV32-NEXT: sw a4, 12(sp)
+; RV32-NEXT: sw a5, 8(sp)
+; RV32-NEXT: beqz a0, .LBB17_6
; RV32-NEXT: .LBB17_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: beq a4, s1, .LBB17_4
@@ -458,11 +478,11 @@ define void @amominu_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: mv a3, s1
; RV32-NEXT: j .LBB17_1
; RV32-NEXT: .LBB17_6: # %atomicrmw.end
-; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 48
; RV32-NEXT: ret
;
; RV64-LABEL: amominu_d_discard:
diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
index 4223440b9cb888..5ef4964e8b8561 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
@@ -5359,13 +5359,16 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB45_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB45_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB45_4
; RV32I-NEXT: .LBB45_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 24
@@ -5422,33 +5425,37 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 56
; RV64I-NEXT: srai s2, a0, 56
; RV64I-NEXT: j .LBB45_2
; RV64I-NEXT: .LBB45_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB45_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB45_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB45_4
; RV64I-NEXT: .LBB45_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 56
-; RV64I-NEXT: srai a0, a0, 56
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: blt s2, a0, .LBB45_1
+; RV64I-NEXT: slli a0, a2, 56
+; RV64I-NEXT: srai a1, a0, 56
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: blt s2, a1, .LBB45_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB45_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB45_1
; RV64I-NEXT: .LBB45_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -5544,13 +5551,16 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB46_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB46_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB46_4
; RV32I-NEXT: .LBB46_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 24
@@ -5636,33 +5646,37 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 56
; RV64I-NEXT: srai s2, a0, 56
; RV64I-NEXT: j .LBB46_2
; RV64I-NEXT: .LBB46_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB46_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB46_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB46_4
; RV64I-NEXT: .LBB46_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 56
-; RV64I-NEXT: srai a0, a0, 56
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: blt s2, a0, .LBB46_1
+; RV64I-NEXT: slli a0, a2, 56
+; RV64I-NEXT: srai a1, a0, 56
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: blt s2, a1, .LBB46_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB46_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB46_1
; RV64I-NEXT: .LBB46_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -5816,13 +5830,16 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB47_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB47_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB47_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB47_4
; RV32I-NEXT: .LBB47_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 24
@@ -5908,33 +5925,37 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 56
; RV64I-NEXT: srai s2, a0, 56
; RV64I-NEXT: j .LBB47_2
; RV64I-NEXT: .LBB47_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB47_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB47_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB47_4
; RV64I-NEXT: .LBB47_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 56
-; RV64I-NEXT: srai a0, a0, 56
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: blt s2, a0, .LBB47_1
+; RV64I-NEXT: slli a0, a2, 56
+; RV64I-NEXT: srai a1, a0, 56
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: blt s2, a1, .LBB47_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB47_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB47_1
; RV64I-NEXT: .LBB47_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -6088,13 +6109,16 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB48_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB48_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB48_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB48_4
; RV32I-NEXT: .LBB48_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 24
@@ -6180,33 +6204,37 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 56
; RV64I-NEXT: srai s2, a0, 56
; RV64I-NEXT: j .LBB48_2
; RV64I-NEXT: .LBB48_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB48_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB48_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB48_4
; RV64I-NEXT: .LBB48_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 56
-; RV64I-NEXT: srai a0, a0, 56
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: blt s2, a0, .LBB48_1
+; RV64I-NEXT: slli a0, a2, 56
+; RV64I-NEXT: srai a1, a0, 56
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: blt s2, a1, .LBB48_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB48_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB48_1
; RV64I-NEXT: .LBB48_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -6360,13 +6388,16 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB49_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB49_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB49_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB49_4
; RV32I-NEXT: .LBB49_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 24
@@ -6423,33 +6454,37 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 56
; RV64I-NEXT: srai s2, a0, 56
; RV64I-NEXT: j .LBB49_2
; RV64I-NEXT: .LBB49_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB49_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB49_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB49_4
; RV64I-NEXT: .LBB49_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 56
-; RV64I-NEXT: srai a0, a0, 56
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: blt s2, a0, .LBB49_1
+; RV64I-NEXT: slli a0, a2, 56
+; RV64I-NEXT: srai a1, a0, 56
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: blt s2, a1, .LBB49_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB49_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB49_1
; RV64I-NEXT: .LBB49_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -6545,13 +6580,16 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB50_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB50_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB50_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB50_4
; RV32I-NEXT: .LBB50_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 24
@@ -6608,33 +6646,37 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 56
; RV64I-NEXT: srai s2, a0, 56
; RV64I-NEXT: j .LBB50_2
; RV64I-NEXT: .LBB50_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB50_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB50_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB50_4
; RV64I-NEXT: .LBB50_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 56
-; RV64I-NEXT: srai a0, a0, 56
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bge s2, a0, .LBB50_1
+; RV64I-NEXT: slli a0, a2, 56
+; RV64I-NEXT: srai a1, a0, 56
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bge s2, a1, .LBB50_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB50_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB50_1
; RV64I-NEXT: .LBB50_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -6730,13 +6772,16 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB51_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB51_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB51_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB51_4
; RV32I-NEXT: .LBB51_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 24
@@ -6822,33 +6867,37 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 56
; RV64I-NEXT: srai s2, a0, 56
; RV64I-NEXT: j .LBB51_2
; RV64I-NEXT: .LBB51_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB51_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB51_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB51_4
; RV64I-NEXT: .LBB51_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 56
-; RV64I-NEXT: srai a0, a0, 56
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bge s2, a0, .LBB51_1
+; RV64I-NEXT: slli a0, a2, 56
+; RV64I-NEXT: srai a1, a0, 56
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bge s2, a1, .LBB51_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB51_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB51_1
; RV64I-NEXT: .LBB51_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -7002,13 +7051,16 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB52_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB52_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB52_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB52_4
; RV32I-NEXT: .LBB52_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 24
@@ -7094,33 +7146,37 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 56
; RV64I-NEXT: srai s2, a0, 56
; RV64I-NEXT: j .LBB52_2
; RV64I-NEXT: .LBB52_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB52_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB52_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB52_4
; RV64I-NEXT: .LBB52_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 56
-; RV64I-NEXT: srai a0, a0, 56
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bge s2, a0, .LBB52_1
+; RV64I-NEXT: slli a0, a2, 56
+; RV64I-NEXT: srai a1, a0, 56
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bge s2, a1, .LBB52_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB52_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB52_1
; RV64I-NEXT: .LBB52_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -7274,13 +7330,16 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB53_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB53_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB53_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB53_4
; RV32I-NEXT: .LBB53_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 24
@@ -7366,33 +7425,37 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 56
; RV64I-NEXT: srai s2, a0, 56
; RV64I-NEXT: j .LBB53_2
; RV64I-NEXT: .LBB53_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB53_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB53_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB53_4
; RV64I-NEXT: .LBB53_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 56
-; RV64I-NEXT: srai a0, a0, 56
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bge s2, a0, .LBB53_1
+; RV64I-NEXT: slli a0, a2, 56
+; RV64I-NEXT: srai a1, a0, 56
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bge s2, a1, .LBB53_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB53_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB53_1
; RV64I-NEXT: .LBB53_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -7546,13 +7609,16 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB54_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB54_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB54_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB54_4
; RV32I-NEXT: .LBB54_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 24
@@ -7609,33 +7675,37 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 56
; RV64I-NEXT: srai s2, a0, 56
; RV64I-NEXT: j .LBB54_2
; RV64I-NEXT: .LBB54_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB54_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB54_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB54_4
; RV64I-NEXT: .LBB54_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 56
-; RV64I-NEXT: srai a0, a0, 56
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bge s2, a0, .LBB54_1
+; RV64I-NEXT: slli a0, a2, 56
+; RV64I-NEXT: srai a1, a0, 56
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bge s2, a1, .LBB54_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB54_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB54_1
; RV64I-NEXT: .LBB54_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -7730,13 +7800,16 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB55_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB55_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB55_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB55_4
; RV32I-NEXT: .LBB55_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: andi a0, a3, 255
@@ -7787,31 +7860,35 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: andi s2, a1, 255
; RV64I-NEXT: j .LBB55_2
; RV64I-NEXT: .LBB55_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB55_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB55_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB55_4
; RV64I-NEXT: .LBB55_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: andi a0, a3, 255
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bltu s2, a0, .LBB55_1
+; RV64I-NEXT: andi a1, a2, 255
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bltu s2, a1, .LBB55_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB55_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB55_1
; RV64I-NEXT: .LBB55_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -7896,13 +7973,16 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB56_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB56_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB56_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB56_4
; RV32I-NEXT: .LBB56_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: andi a0, a3, 255
@@ -7977,31 +8057,35 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: andi s2, a1, 255
; RV64I-NEXT: j .LBB56_2
; RV64I-NEXT: .LBB56_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB56_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB56_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB56_4
; RV64I-NEXT: .LBB56_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: andi a0, a3, 255
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bltu s2, a0, .LBB56_1
+; RV64I-NEXT: andi a1, a2, 255
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bltu s2, a1, .LBB56_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB56_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB56_1
; RV64I-NEXT: .LBB56_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -8134,13 +8218,16 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB57_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB57_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB57_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB57_4
; RV32I-NEXT: .LBB57_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: andi a0, a3, 255
@@ -8215,31 +8302,35 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: andi s2, a1, 255
; RV64I-NEXT: j .LBB57_2
; RV64I-NEXT: .LBB57_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB57_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB57_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB57_4
; RV64I-NEXT: .LBB57_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: andi a0, a3, 255
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bltu s2, a0, .LBB57_1
+; RV64I-NEXT: andi a1, a2, 255
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bltu s2, a1, .LBB57_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB57_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB57_1
; RV64I-NEXT: .LBB57_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -8372,13 +8463,16 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB58_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB58_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB58_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB58_4
; RV32I-NEXT: .LBB58_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: andi a0, a3, 255
@@ -8453,31 +8547,35 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: andi s2, a1, 255
; RV64I-NEXT: j .LBB58_2
; RV64I-NEXT: .LBB58_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB58_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB58_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB58_4
; RV64I-NEXT: .LBB58_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: andi a0, a3, 255
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bltu s2, a0, .LBB58_1
+; RV64I-NEXT: andi a1, a2, 255
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bltu s2, a1, .LBB58_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB58_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB58_1
; RV64I-NEXT: .LBB58_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -8610,13 +8708,16 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB59_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB59_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB59_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB59_4
; RV32I-NEXT: .LBB59_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: andi a0, a3, 255
@@ -8667,31 +8768,35 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: andi s2, a1, 255
; RV64I-NEXT: j .LBB59_2
; RV64I-NEXT: .LBB59_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB59_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB59_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB59_4
; RV64I-NEXT: .LBB59_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: andi a0, a3, 255
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bltu s2, a0, .LBB59_1
+; RV64I-NEXT: andi a1, a2, 255
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bltu s2, a1, .LBB59_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB59_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB59_1
; RV64I-NEXT: .LBB59_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -8776,13 +8881,16 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB60_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB60_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB60_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB60_4
; RV32I-NEXT: .LBB60_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: andi a0, a3, 255
@@ -8833,31 +8941,35 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: andi s2, a1, 255
; RV64I-NEXT: j .LBB60_2
; RV64I-NEXT: .LBB60_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB60_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB60_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB60_4
; RV64I-NEXT: .LBB60_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: andi a0, a3, 255
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bgeu s2, a0, .LBB60_1
+; RV64I-NEXT: andi a1, a2, 255
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bgeu s2, a1, .LBB60_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB60_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB60_1
; RV64I-NEXT: .LBB60_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -8942,13 +9054,16 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB61_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB61_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB61_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB61_4
; RV32I-NEXT: .LBB61_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: andi a0, a3, 255
@@ -9023,31 +9138,35 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: andi s2, a1, 255
; RV64I-NEXT: j .LBB61_2
; RV64I-NEXT: .LBB61_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB61_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB61_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB61_4
; RV64I-NEXT: .LBB61_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: andi a0, a3, 255
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bgeu s2, a0, .LBB61_1
+; RV64I-NEXT: andi a1, a2, 255
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bgeu s2, a1, .LBB61_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB61_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB61_1
; RV64I-NEXT: .LBB61_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -9180,13 +9299,16 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB62_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB62_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB62_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB62_4
; RV32I-NEXT: .LBB62_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: andi a0, a3, 255
@@ -9261,31 +9383,35 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: andi s2, a1, 255
; RV64I-NEXT: j .LBB62_2
; RV64I-NEXT: .LBB62_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB62_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB62_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB62_4
; RV64I-NEXT: .LBB62_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: andi a0, a3, 255
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bgeu s2, a0, .LBB62_1
+; RV64I-NEXT: andi a1, a2, 255
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bgeu s2, a1, .LBB62_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB62_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB62_1
; RV64I-NEXT: .LBB62_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -9418,13 +9544,16 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB63_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB63_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB63_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB63_4
; RV32I-NEXT: .LBB63_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: andi a0, a3, 255
@@ -9499,31 +9628,35 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: andi s2, a1, 255
; RV64I-NEXT: j .LBB63_2
; RV64I-NEXT: .LBB63_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB63_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB63_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB63_4
; RV64I-NEXT: .LBB63_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: andi a0, a3, 255
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bgeu s2, a0, .LBB63_1
+; RV64I-NEXT: andi a1, a2, 255
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bgeu s2, a1, .LBB63_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB63_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB63_1
; RV64I-NEXT: .LBB63_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -9656,13 +9789,16 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB64_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB64_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB64_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB64_4
; RV32I-NEXT: .LBB64_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: andi a0, a3, 255
@@ -9713,31 +9849,35 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: andi s2, a1, 255
; RV64I-NEXT: j .LBB64_2
; RV64I-NEXT: .LBB64_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB64_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB64_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB64_4
; RV64I-NEXT: .LBB64_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: andi a0, a3, 255
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bgeu s2, a0, .LBB64_1
+; RV64I-NEXT: andi a1, a2, 255
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bgeu s2, a1, .LBB64_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB64_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB64_1
; RV64I-NEXT: .LBB64_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -15386,13 +15526,16 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB110_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB110_2 Depth=1
; RV32I-NEXT: sh a3, 14(sp)
+; RV32I-NEXT: sh a2, 12(sp)
; RV32I-NEXT: addi a1, sp, 14
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a3, 14(sp)
-; RV32I-NEXT: bnez a0, .LBB110_4
+; RV32I-NEXT: lhu a3, 14(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a3, 10(sp)
+; RV32I-NEXT: beqz a0, .LBB110_4
; RV32I-NEXT: .LBB110_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 16
@@ -15451,33 +15594,37 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lhu a3, 0(a0)
+; RV64I-NEXT: lhu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 48
; RV64I-NEXT: srai s2, a0, 48
; RV64I-NEXT: j .LBB110_2
; RV64I-NEXT: .LBB110_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB110_2 Depth=1
-; RV64I-NEXT: sh a3, 14(sp)
+; RV64I-NEXT: sh a2, 14(sp)
+; RV64I-NEXT: sh a0, 12(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 14
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a3, 14(sp)
-; RV64I-NEXT: bnez a0, .LBB110_4
+; RV64I-NEXT: lhu a2, 14(sp)
+; RV64I-NEXT: sh a2, 10(sp)
+; RV64I-NEXT: beqz a0, .LBB110_4
; RV64I-NEXT: .LBB110_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 48
-; RV64I-NEXT: srai a0, a0, 48
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: blt s2, a0, .LBB110_1
+; RV64I-NEXT: slli a0, a2, 48
+; RV64I-NEXT: srai a1, a0, 48
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: blt s2, a1, .LBB110_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB110_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB110_1
; RV64I-NEXT: .LBB110_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -15577,13 +15724,16 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB111_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB111_2 Depth=1
; RV32I-NEXT: sh a3, 14(sp)
+; RV32I-NEXT: sh a2, 12(sp)
; RV32I-NEXT: addi a1, sp, 14
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a3, 14(sp)
-; RV32I-NEXT: bnez a0, .LBB111_4
+; RV32I-NEXT: lhu a3, 14(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a3, 10(sp)
+; RV32I-NEXT: beqz a0, .LBB111_4
; RV32I-NEXT: .LBB111_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 16
@@ -15673,33 +15823,37 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lhu a3, 0(a0)
+; RV64I-NEXT: lhu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 48
; RV64I-NEXT: srai s2, a0, 48
; RV64I-NEXT: j .LBB111_2
; RV64I-NEXT: .LBB111_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB111_2 Depth=1
-; RV64I-NEXT: sh a3, 14(sp)
+; RV64I-NEXT: sh a2, 14(sp)
+; RV64I-NEXT: sh a0, 12(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 14
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a3, 14(sp)
-; RV64I-NEXT: bnez a0, .LBB111_4
+; RV64I-NEXT: lhu a2, 14(sp)
+; RV64I-NEXT: sh a2, 10(sp)
+; RV64I-NEXT: beqz a0, .LBB111_4
; RV64I-NEXT: .LBB111_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 48
-; RV64I-NEXT: srai a0, a0, 48
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: blt s2, a0, .LBB111_1
+; RV64I-NEXT: slli a0, a2, 48
+; RV64I-NEXT: srai a1, a0, 48
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: blt s2, a1, .LBB111_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB111_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB111_1
; RV64I-NEXT: .LBB111_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -15861,13 +16015,16 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB112_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB112_2 Depth=1
; RV32I-NEXT: sh a3, 14(sp)
+; RV32I-NEXT: sh a2, 12(sp)
; RV32I-NEXT: addi a1, sp, 14
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a3, 14(sp)
-; RV32I-NEXT: bnez a0, .LBB112_4
+; RV32I-NEXT: lhu a3, 14(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a3, 10(sp)
+; RV32I-NEXT: beqz a0, .LBB112_4
; RV32I-NEXT: .LBB112_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 16
@@ -15957,33 +16114,37 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lhu a3, 0(a0)
+; RV64I-NEXT: lhu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 48
; RV64I-NEXT: srai s2, a0, 48
; RV64I-NEXT: j .LBB112_2
; RV64I-NEXT: .LBB112_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB112_2 Depth=1
-; RV64I-NEXT: sh a3, 14(sp)
+; RV64I-NEXT: sh a2, 14(sp)
+; RV64I-NEXT: sh a0, 12(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 14
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a3, 14(sp)
-; RV64I-NEXT: bnez a0, .LBB112_4
+; RV64I-NEXT: lhu a2, 14(sp)
+; RV64I-NEXT: sh a2, 10(sp)
+; RV64I-NEXT: beqz a0, .LBB112_4
; RV64I-NEXT: .LBB112_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 48
-; RV64I-NEXT: srai a0, a0, 48
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: blt s2, a0, .LBB112_1
+; RV64I-NEXT: slli a0, a2, 48
+; RV64I-NEXT: srai a1, a0, 48
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: blt s2, a1, .LBB112_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB112_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB112_1
; RV64I-NEXT: .LBB112_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -16145,13 +16306,16 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB113_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB113_2 Depth=1
; RV32I-NEXT: sh a3, 14(sp)
+; RV32I-NEXT: sh a2, 12(sp)
; RV32I-NEXT: addi a1, sp, 14
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a3, 14(sp)
-; RV32I-NEXT: bnez a0, .LBB113_4
+; RV32I-NEXT: lhu a3, 14(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a3, 10(sp)
+; RV32I-NEXT: beqz a0, .LBB113_4
; RV32I-NEXT: .LBB113_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 16
@@ -16241,33 +16405,37 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lhu a3, 0(a0)
+; RV64I-NEXT: lhu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 48
; RV64I-NEXT: srai s2, a0, 48
; RV64I-NEXT: j .LBB113_2
; RV64I-NEXT: .LBB113_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB113_2 Depth=1
-; RV64I-NEXT: sh a3, 14(sp)
+; RV64I-NEXT: sh a2, 14(sp)
+; RV64I-NEXT: sh a0, 12(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 14
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a3, 14(sp)
-; RV64I-NEXT: bnez a0, .LBB113_4
+; RV64I-NEXT: lhu a2, 14(sp)
+; RV64I-NEXT: sh a2, 10(sp)
+; RV64I-NEXT: beqz a0, .LBB113_4
; RV64I-NEXT: .LBB113_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 48
-; RV64I-NEXT: srai a0, a0, 48
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: blt s2, a0, .LBB113_1
+; RV64I-NEXT: slli a0, a2, 48
+; RV64I-NEXT: srai a1, a0, 48
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: blt s2, a1, .LBB113_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB113_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB113_1
; RV64I-NEXT: .LBB113_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -16429,13 +16597,16 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB114_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB114_2 Depth=1
; RV32I-NEXT: sh a3, 14(sp)
+; RV32I-NEXT: sh a2, 12(sp)
; RV32I-NEXT: addi a1, sp, 14
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a3, 14(sp)
-; RV32I-NEXT: bnez a0, .LBB114_4
+; RV32I-NEXT: lhu a3, 14(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a3, 10(sp)
+; RV32I-NEXT: beqz a0, .LBB114_4
; RV32I-NEXT: .LBB114_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 16
@@ -16494,33 +16665,37 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lhu a3, 0(a0)
+; RV64I-NEXT: lhu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 48
; RV64I-NEXT: srai s2, a0, 48
; RV64I-NEXT: j .LBB114_2
; RV64I-NEXT: .LBB114_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB114_2 Depth=1
-; RV64I-NEXT: sh a3, 14(sp)
+; RV64I-NEXT: sh a2, 14(sp)
+; RV64I-NEXT: sh a0, 12(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 14
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a3, 14(sp)
-; RV64I-NEXT: bnez a0, .LBB114_4
+; RV64I-NEXT: lhu a2, 14(sp)
+; RV64I-NEXT: sh a2, 10(sp)
+; RV64I-NEXT: beqz a0, .LBB114_4
; RV64I-NEXT: .LBB114_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 48
-; RV64I-NEXT: srai a0, a0, 48
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: blt s2, a0, .LBB114_1
+; RV64I-NEXT: slli a0, a2, 48
+; RV64I-NEXT: srai a1, a0, 48
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: blt s2, a1, .LBB114_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB114_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB114_1
; RV64I-NEXT: .LBB114_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -16620,13 +16795,16 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB115_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB115_2 Depth=1
; RV32I-NEXT: sh a3, 14(sp)
+; RV32I-NEXT: sh a2, 12(sp)
; RV32I-NEXT: addi a1, sp, 14
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a3, 14(sp)
-; RV32I-NEXT: bnez a0, .LBB115_4
+; RV32I-NEXT: lhu a3, 14(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a3, 10(sp)
+; RV32I-NEXT: beqz a0, .LBB115_4
; RV32I-NEXT: .LBB115_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 16
@@ -16685,33 +16863,37 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lhu a3, 0(a0)
+; RV64I-NEXT: lhu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 48
; RV64I-NEXT: srai s2, a0, 48
; RV64I-NEXT: j .LBB115_2
; RV64I-NEXT: .LBB115_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB115_2 Depth=1
-; RV64I-NEXT: sh a3, 14(sp)
+; RV64I-NEXT: sh a2, 14(sp)
+; RV64I-NEXT: sh a0, 12(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 14
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a3, 14(sp)
-; RV64I-NEXT: bnez a0, .LBB115_4
+; RV64I-NEXT: lhu a2, 14(sp)
+; RV64I-NEXT: sh a2, 10(sp)
+; RV64I-NEXT: beqz a0, .LBB115_4
; RV64I-NEXT: .LBB115_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 48
-; RV64I-NEXT: srai a0, a0, 48
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bge s2, a0, .LBB115_1
+; RV64I-NEXT: slli a0, a2, 48
+; RV64I-NEXT: srai a1, a0, 48
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bge s2, a1, .LBB115_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB115_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB115_1
; RV64I-NEXT: .LBB115_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -16811,13 +16993,16 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB116_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB116_2 Depth=1
; RV32I-NEXT: sh a3, 14(sp)
+; RV32I-NEXT: sh a2, 12(sp)
; RV32I-NEXT: addi a1, sp, 14
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a3, 14(sp)
-; RV32I-NEXT: bnez a0, .LBB116_4
+; RV32I-NEXT: lhu a3, 14(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a3, 10(sp)
+; RV32I-NEXT: beqz a0, .LBB116_4
; RV32I-NEXT: .LBB116_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 16
@@ -16907,33 +17092,37 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lhu a3, 0(a0)
+; RV64I-NEXT: lhu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 48
; RV64I-NEXT: srai s2, a0, 48
; RV64I-NEXT: j .LBB116_2
; RV64I-NEXT: .LBB116_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB116_2 Depth=1
-; RV64I-NEXT: sh a3, 14(sp)
+; RV64I-NEXT: sh a2, 14(sp)
+; RV64I-NEXT: sh a0, 12(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 14
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a3, 14(sp)
-; RV64I-NEXT: bnez a0, .LBB116_4
+; RV64I-NEXT: lhu a2, 14(sp)
+; RV64I-NEXT: sh a2, 10(sp)
+; RV64I-NEXT: beqz a0, .LBB116_4
; RV64I-NEXT: .LBB116_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 48
-; RV64I-NEXT: srai a0, a0, 48
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bge s2, a0, .LBB116_1
+; RV64I-NEXT: slli a0, a2, 48
+; RV64I-NEXT: srai a1, a0, 48
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bge s2, a1, .LBB116_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB116_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB116_1
; RV64I-NEXT: .LBB116_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -17095,13 +17284,16 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB117_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB117_2 Depth=1
; RV32I-NEXT: sh a3, 14(sp)
+; RV32I-NEXT: sh a2, 12(sp)
; RV32I-NEXT: addi a1, sp, 14
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a3, 14(sp)
-; RV32I-NEXT: bnez a0, .LBB117_4
+; RV32I-NEXT: lhu a3, 14(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a3, 10(sp)
+; RV32I-NEXT: beqz a0, .LBB117_4
; RV32I-NEXT: .LBB117_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 16
@@ -17191,33 +17383,37 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lhu a3, 0(a0)
+; RV64I-NEXT: lhu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 48
; RV64I-NEXT: srai s2, a0, 48
; RV64I-NEXT: j .LBB117_2
; RV64I-NEXT: .LBB117_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB117_2 Depth=1
-; RV64I-NEXT: sh a3, 14(sp)
+; RV64I-NEXT: sh a2, 14(sp)
+; RV64I-NEXT: sh a0, 12(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 14
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a3, 14(sp)
-; RV64I-NEXT: bnez a0, .LBB117_4
+; RV64I-NEXT: lhu a2, 14(sp)
+; RV64I-NEXT: sh a2, 10(sp)
+; RV64I-NEXT: beqz a0, .LBB117_4
; RV64I-NEXT: .LBB117_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 48
-; RV64I-NEXT: srai a0, a0, 48
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bge s2, a0, .LBB117_1
+; RV64I-NEXT: slli a0, a2, 48
+; RV64I-NEXT: srai a1, a0, 48
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bge s2, a1, .LBB117_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB117_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB117_1
; RV64I-NEXT: .LBB117_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -17379,13 +17575,16 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB118_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB118_2 Depth=1
; RV32I-NEXT: sh a3, 14(sp)
+; RV32I-NEXT: sh a2, 12(sp)
; RV32I-NEXT: addi a1, sp, 14
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a3, 14(sp)
-; RV32I-NEXT: bnez a0, .LBB118_4
+; RV32I-NEXT: lhu a3, 14(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a3, 10(sp)
+; RV32I-NEXT: beqz a0, .LBB118_4
; RV32I-NEXT: .LBB118_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 16
@@ -17475,33 +17674,37 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lhu a3, 0(a0)
+; RV64I-NEXT: lhu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 48
; RV64I-NEXT: srai s2, a0, 48
; RV64I-NEXT: j .LBB118_2
; RV64I-NEXT: .LBB118_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB118_2 Depth=1
-; RV64I-NEXT: sh a3, 14(sp)
+; RV64I-NEXT: sh a2, 14(sp)
+; RV64I-NEXT: sh a0, 12(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 14
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a3, 14(sp)
-; RV64I-NEXT: bnez a0, .LBB118_4
+; RV64I-NEXT: lhu a2, 14(sp)
+; RV64I-NEXT: sh a2, 10(sp)
+; RV64I-NEXT: beqz a0, .LBB118_4
; RV64I-NEXT: .LBB118_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 48
-; RV64I-NEXT: srai a0, a0, 48
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bge s2, a0, .LBB118_1
+; RV64I-NEXT: slli a0, a2, 48
+; RV64I-NEXT: srai a1, a0, 48
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bge s2, a1, .LBB118_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB118_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB118_1
; RV64I-NEXT: .LBB118_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -17663,13 +17866,16 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB119_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB119_2 Depth=1
; RV32I-NEXT: sh a3, 14(sp)
+; RV32I-NEXT: sh a2, 12(sp)
; RV32I-NEXT: addi a1, sp, 14
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a3, 14(sp)
-; RV32I-NEXT: bnez a0, .LBB119_4
+; RV32I-NEXT: lhu a3, 14(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a3, 10(sp)
+; RV32I-NEXT: beqz a0, .LBB119_4
; RV32I-NEXT: .LBB119_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 16
@@ -17728,33 +17934,37 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lhu a3, 0(a0)
+; RV64I-NEXT: lhu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 48
; RV64I-NEXT: srai s2, a0, 48
; RV64I-NEXT: j .LBB119_2
; RV64I-NEXT: .LBB119_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB119_2 Depth=1
-; RV64I-NEXT: sh a3, 14(sp)
+; RV64I-NEXT: sh a2, 14(sp)
+; RV64I-NEXT: sh a0, 12(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 14
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a3, 14(sp)
-; RV64I-NEXT: bnez a0, .LBB119_4
+; RV64I-NEXT: lhu a2, 14(sp)
+; RV64I-NEXT: sh a2, 10(sp)
+; RV64I-NEXT: beqz a0, .LBB119_4
; RV64I-NEXT: .LBB119_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 48
-; RV64I-NEXT: srai a0, a0, 48
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bge s2, a0, .LBB119_1
+; RV64I-NEXT: slli a0, a2, 48
+; RV64I-NEXT: srai a1, a0, 48
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bge s2, a1, .LBB119_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB119_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB119_1
; RV64I-NEXT: .LBB119_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -17856,13 +18066,16 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB120_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB120_2 Depth=1
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a1, 10(sp)
-; RV32I-NEXT: bnez a0, .LBB120_4
+; RV32I-NEXT: lhu a1, 10(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a1, 6(sp)
+; RV32I-NEXT: beqz a0, .LBB120_4
; RV32I-NEXT: .LBB120_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: and a0, a1, s2
@@ -17925,21 +18138,25 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: .LBB120_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB120_2 Depth=1
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a0, 4(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a1, 6(sp)
-; RV64I-NEXT: bnez a0, .LBB120_4
+; RV64I-NEXT: lhu a1, 6(sp)
+; RV64I-NEXT: sh a1, 2(sp)
+; RV64I-NEXT: beqz a0, .LBB120_4
; RV64I-NEXT: .LBB120_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: and a0, a1, s2
-; RV64I-NEXT: mv a2, a1
-; RV64I-NEXT: bltu s3, a0, .LBB120_1
+; RV64I-NEXT: and a2, a1, s2
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: bltu s3, a2, .LBB120_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB120_2 Depth=1
-; RV64I-NEXT: mv a2, s0
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: j .LBB120_1
; RV64I-NEXT: .LBB120_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a1
@@ -18033,13 +18250,16 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB121_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB121_2 Depth=1
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a1, 10(sp)
-; RV32I-NEXT: bnez a0, .LBB121_4
+; RV32I-NEXT: lhu a1, 10(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a1, 6(sp)
+; RV32I-NEXT: beqz a0, .LBB121_4
; RV32I-NEXT: .LBB121_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: and a0, a1, s2
@@ -18127,21 +18347,25 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: .LBB121_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB121_2 Depth=1
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a0, 4(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a1, 6(sp)
-; RV64I-NEXT: bnez a0, .LBB121_4
+; RV64I-NEXT: lhu a1, 6(sp)
+; RV64I-NEXT: sh a1, 2(sp)
+; RV64I-NEXT: beqz a0, .LBB121_4
; RV64I-NEXT: .LBB121_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: and a0, a1, s2
-; RV64I-NEXT: mv a2, a1
-; RV64I-NEXT: bltu s3, a0, .LBB121_1
+; RV64I-NEXT: and a2, a1, s2
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: bltu s3, a2, .LBB121_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB121_2 Depth=1
-; RV64I-NEXT: mv a2, s0
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: j .LBB121_1
; RV64I-NEXT: .LBB121_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a1
@@ -18285,13 +18509,16 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB122_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB122_2 Depth=1
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a1, 10(sp)
-; RV32I-NEXT: bnez a0, .LBB122_4
+; RV32I-NEXT: lhu a1, 10(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a1, 6(sp)
+; RV32I-NEXT: beqz a0, .LBB122_4
; RV32I-NEXT: .LBB122_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: and a0, a1, s2
@@ -18379,21 +18606,25 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: .LBB122_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB122_2 Depth=1
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a0, 4(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a1, 6(sp)
-; RV64I-NEXT: bnez a0, .LBB122_4
+; RV64I-NEXT: lhu a1, 6(sp)
+; RV64I-NEXT: sh a1, 2(sp)
+; RV64I-NEXT: beqz a0, .LBB122_4
; RV64I-NEXT: .LBB122_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: and a0, a1, s2
-; RV64I-NEXT: mv a2, a1
-; RV64I-NEXT: bltu s3, a0, .LBB122_1
+; RV64I-NEXT: and a2, a1, s2
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: bltu s3, a2, .LBB122_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB122_2 Depth=1
-; RV64I-NEXT: mv a2, s0
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: j .LBB122_1
; RV64I-NEXT: .LBB122_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a1
@@ -18537,13 +18768,16 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB123_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB123_2 Depth=1
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a1, 10(sp)
-; RV32I-NEXT: bnez a0, .LBB123_4
+; RV32I-NEXT: lhu a1, 10(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a1, 6(sp)
+; RV32I-NEXT: beqz a0, .LBB123_4
; RV32I-NEXT: .LBB123_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: and a0, a1, s2
@@ -18631,21 +18865,25 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: .LBB123_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB123_2 Depth=1
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a0, 4(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a1, 6(sp)
-; RV64I-NEXT: bnez a0, .LBB123_4
+; RV64I-NEXT: lhu a1, 6(sp)
+; RV64I-NEXT: sh a1, 2(sp)
+; RV64I-NEXT: beqz a0, .LBB123_4
; RV64I-NEXT: .LBB123_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: and a0, a1, s2
-; RV64I-NEXT: mv a2, a1
-; RV64I-NEXT: bltu s3, a0, .LBB123_1
+; RV64I-NEXT: and a2, a1, s2
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: bltu s3, a2, .LBB123_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB123_2 Depth=1
-; RV64I-NEXT: mv a2, s0
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: j .LBB123_1
; RV64I-NEXT: .LBB123_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a1
@@ -18789,13 +19027,16 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB124_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB124_2 Depth=1
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a1, 10(sp)
-; RV32I-NEXT: bnez a0, .LBB124_4
+; RV32I-NEXT: lhu a1, 10(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a1, 6(sp)
+; RV32I-NEXT: beqz a0, .LBB124_4
; RV32I-NEXT: .LBB124_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: and a0, a1, s2
@@ -18858,21 +19099,25 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: .LBB124_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB124_2 Depth=1
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a0, 4(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a1, 6(sp)
-; RV64I-NEXT: bnez a0, .LBB124_4
+; RV64I-NEXT: lhu a1, 6(sp)
+; RV64I-NEXT: sh a1, 2(sp)
+; RV64I-NEXT: beqz a0, .LBB124_4
; RV64I-NEXT: .LBB124_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: and a0, a1, s2
-; RV64I-NEXT: mv a2, a1
-; RV64I-NEXT: bltu s3, a0, .LBB124_1
+; RV64I-NEXT: and a2, a1, s2
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: bltu s3, a2, .LBB124_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB124_2 Depth=1
-; RV64I-NEXT: mv a2, s0
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: j .LBB124_1
; RV64I-NEXT: .LBB124_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a1
@@ -18966,13 +19211,16 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB125_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB125_2 Depth=1
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a1, 10(sp)
-; RV32I-NEXT: bnez a0, .LBB125_4
+; RV32I-NEXT: lhu a1, 10(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a1, 6(sp)
+; RV32I-NEXT: beqz a0, .LBB125_4
; RV32I-NEXT: .LBB125_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: and a0, a1, s2
@@ -19035,21 +19283,25 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: .LBB125_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB125_2 Depth=1
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a0, 4(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a1, 6(sp)
-; RV64I-NEXT: bnez a0, .LBB125_4
+; RV64I-NEXT: lhu a1, 6(sp)
+; RV64I-NEXT: sh a1, 2(sp)
+; RV64I-NEXT: beqz a0, .LBB125_4
; RV64I-NEXT: .LBB125_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: and a0, a1, s2
-; RV64I-NEXT: mv a2, a1
-; RV64I-NEXT: bgeu s3, a0, .LBB125_1
+; RV64I-NEXT: and a2, a1, s2
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: bgeu s3, a2, .LBB125_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB125_2 Depth=1
-; RV64I-NEXT: mv a2, s0
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: j .LBB125_1
; RV64I-NEXT: .LBB125_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a1
@@ -19143,13 +19395,16 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB126_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB126_2 Depth=1
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a1, 10(sp)
-; RV32I-NEXT: bnez a0, .LBB126_4
+; RV32I-NEXT: lhu a1, 10(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a1, 6(sp)
+; RV32I-NEXT: beqz a0, .LBB126_4
; RV32I-NEXT: .LBB126_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: and a0, a1, s2
@@ -19237,21 +19492,25 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: .LBB126_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB126_2 Depth=1
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a0, 4(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a1, 6(sp)
-; RV64I-NEXT: bnez a0, .LBB126_4
+; RV64I-NEXT: lhu a1, 6(sp)
+; RV64I-NEXT: sh a1, 2(sp)
+; RV64I-NEXT: beqz a0, .LBB126_4
; RV64I-NEXT: .LBB126_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: and a0, a1, s2
-; RV64I-NEXT: mv a2, a1
-; RV64I-NEXT: bgeu s3, a0, .LBB126_1
+; RV64I-NEXT: and a2, a1, s2
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: bgeu s3, a2, .LBB126_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB126_2 Depth=1
-; RV64I-NEXT: mv a2, s0
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: j .LBB126_1
; RV64I-NEXT: .LBB126_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a1
@@ -19395,13 +19654,16 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB127_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB127_2 Depth=1
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a1, 10(sp)
-; RV32I-NEXT: bnez a0, .LBB127_4
+; RV32I-NEXT: lhu a1, 10(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a1, 6(sp)
+; RV32I-NEXT: beqz a0, .LBB127_4
; RV32I-NEXT: .LBB127_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: and a0, a1, s2
@@ -19489,21 +19751,25 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: .LBB127_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB127_2 Depth=1
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a0, 4(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a1, 6(sp)
-; RV64I-NEXT: bnez a0, .LBB127_4
+; RV64I-NEXT: lhu a1, 6(sp)
+; RV64I-NEXT: sh a1, 2(sp)
+; RV64I-NEXT: beqz a0, .LBB127_4
; RV64I-NEXT: .LBB127_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: and a0, a1, s2
-; RV64I-NEXT: mv a2, a1
-; RV64I-NEXT: bgeu s3, a0, .LBB127_1
+; RV64I-NEXT: and a2, a1, s2
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: bgeu s3, a2, .LBB127_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB127_2 Depth=1
-; RV64I-NEXT: mv a2, s0
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: j .LBB127_1
; RV64I-NEXT: .LBB127_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a1
@@ -19647,13 +19913,16 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB128_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB128_2 Depth=1
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a1, 10(sp)
-; RV32I-NEXT: bnez a0, .LBB128_4
+; RV32I-NEXT: lhu a1, 10(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a1, 6(sp)
+; RV32I-NEXT: beqz a0, .LBB128_4
; RV32I-NEXT: .LBB128_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: and a0, a1, s2
@@ -19741,21 +20010,25 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: .LBB128_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB128_2 Depth=1
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a0, 4(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a1, 6(sp)
-; RV64I-NEXT: bnez a0, .LBB128_4
+; RV64I-NEXT: lhu a1, 6(sp)
+; RV64I-NEXT: sh a1, 2(sp)
+; RV64I-NEXT: beqz a0, .LBB128_4
; RV64I-NEXT: .LBB128_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: and a0, a1, s2
-; RV64I-NEXT: mv a2, a1
-; RV64I-NEXT: bgeu s3, a0, .LBB128_1
+; RV64I-NEXT: and a2, a1, s2
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: bgeu s3, a2, .LBB128_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB128_2 Depth=1
-; RV64I-NEXT: mv a2, s0
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: j .LBB128_1
; RV64I-NEXT: .LBB128_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a1
@@ -19899,13 +20172,16 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB129_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB129_2 Depth=1
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a1, 10(sp)
-; RV32I-NEXT: bnez a0, .LBB129_4
+; RV32I-NEXT: lhu a1, 10(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a1, 6(sp)
+; RV32I-NEXT: beqz a0, .LBB129_4
; RV32I-NEXT: .LBB129_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: and a0, a1, s2
@@ -19968,21 +20244,25 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: .LBB129_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB129_2 Depth=1
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a0, 4(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a1, 6(sp)
-; RV64I-NEXT: bnez a0, .LBB129_4
+; RV64I-NEXT: lhu a1, 6(sp)
+; RV64I-NEXT: sh a1, 2(sp)
+; RV64I-NEXT: beqz a0, .LBB129_4
; RV64I-NEXT: .LBB129_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: and a0, a1, s2
-; RV64I-NEXT: mv a2, a1
-; RV64I-NEXT: bgeu s3, a0, .LBB129_1
+; RV64I-NEXT: and a2, a1, s2
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: bgeu s3, a2, .LBB129_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB129_2 Depth=1
-; RV64I-NEXT: mv a2, s0
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: j .LBB129_1
; RV64I-NEXT: .LBB129_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a1
@@ -22162,24 +22442,27 @@ define i32 @atomicrmw_xor_i32_seq_cst(ptr %a, i32 %b) nounwind {
define i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_max_i32_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB165_2
; RV32I-NEXT: .LBB165_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB165_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB165_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB165_4
; RV32I-NEXT: .LBB165_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -22190,10 +22473,10 @@ define i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB165_1
; RV32I-NEXT: .LBB165_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_max_i32_monotonic:
@@ -22209,30 +22492,34 @@ define i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB165_2
; RV64I-NEXT: .LBB165_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB165_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB165_4
+; RV64I-NEXT: lwu a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB165_4
; RV64I-NEXT: .LBB165_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: blt s2, a3, .LBB165_1
+; RV64I-NEXT: sext.w a1, a2
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: blt s2, a1, .LBB165_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB165_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB165_1
; RV64I-NEXT: .LBB165_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -22251,24 +22538,27 @@ define i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind {
define i32 @atomicrmw_max_i32_acquire(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_max_i32_acquire:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB166_2
; RV32I-NEXT: .LBB166_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB166_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB166_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB166_4
; RV32I-NEXT: .LBB166_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -22279,10 +22569,10 @@ define i32 @atomicrmw_max_i32_acquire(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB166_1
; RV32I-NEXT: .LBB166_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-WMO-LABEL: atomicrmw_max_i32_acquire:
@@ -22303,30 +22593,34 @@ define i32 @atomicrmw_max_i32_acquire(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB166_2
; RV64I-NEXT: .LBB166_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB166_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB166_4
+; RV64I-NEXT: lwu a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB166_4
; RV64I-NEXT: .LBB166_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: blt s2, a3, .LBB166_1
+; RV64I-NEXT: sext.w a1, a2
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: blt s2, a1, .LBB166_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB166_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB166_1
; RV64I-NEXT: .LBB166_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -22350,24 +22644,27 @@ define i32 @atomicrmw_max_i32_acquire(ptr %a, i32 %b) nounwind {
define i32 @atomicrmw_max_i32_release(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_max_i32_release:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB167_2
; RV32I-NEXT: .LBB167_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB167_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB167_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB167_4
; RV32I-NEXT: .LBB167_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -22378,10 +22675,10 @@ define i32 @atomicrmw_max_i32_release(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB167_1
; RV32I-NEXT: .LBB167_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-WMO-LABEL: atomicrmw_max_i32_release:
@@ -22402,30 +22699,34 @@ define i32 @atomicrmw_max_i32_release(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB167_2
; RV64I-NEXT: .LBB167_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB167_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB167_4
+; RV64I-NEXT: lwu a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB167_4
; RV64I-NEXT: .LBB167_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: blt s2, a3, .LBB167_1
+; RV64I-NEXT: sext.w a1, a2
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: blt s2, a1, .LBB167_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB167_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB167_1
; RV64I-NEXT: .LBB167_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -22449,24 +22750,27 @@ define i32 @atomicrmw_max_i32_release(ptr %a, i32 %b) nounwind {
define i32 @atomicrmw_max_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_max_i32_acq_rel:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB168_2
; RV32I-NEXT: .LBB168_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB168_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB168_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB168_4
; RV32I-NEXT: .LBB168_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -22477,10 +22781,10 @@ define i32 @atomicrmw_max_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB168_1
; RV32I-NEXT: .LBB168_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-WMO-LABEL: atomicrmw_max_i32_acq_rel:
@@ -22501,30 +22805,34 @@ define i32 @atomicrmw_max_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB168_2
; RV64I-NEXT: .LBB168_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB168_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB168_4
+; RV64I-NEXT: lwu a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB168_4
; RV64I-NEXT: .LBB168_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: blt s2, a3, .LBB168_1
+; RV64I-NEXT: sext.w a1, a2
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: blt s2, a1, .LBB168_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB168_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB168_1
; RV64I-NEXT: .LBB168_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -22548,24 +22856,27 @@ define i32 @atomicrmw_max_i32_acq_rel(ptr %a, i32 %b) nounwind {
define i32 @atomicrmw_max_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_max_i32_seq_cst:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB169_2
; RV32I-NEXT: .LBB169_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB169_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB169_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB169_4
; RV32I-NEXT: .LBB169_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -22576,10 +22887,10 @@ define i32 @atomicrmw_max_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB169_1
; RV32I-NEXT: .LBB169_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-WMO-LABEL: atomicrmw_max_i32_seq_cst:
@@ -22600,30 +22911,34 @@ define i32 @atomicrmw_max_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB169_2
; RV64I-NEXT: .LBB169_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB169_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB169_4
+; RV64I-NEXT: lwu a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB169_4
; RV64I-NEXT: .LBB169_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: blt s2, a3, .LBB169_1
+; RV64I-NEXT: sext.w a1, a2
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: blt s2, a1, .LBB169_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB169_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB169_1
; RV64I-NEXT: .LBB169_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -22647,24 +22962,27 @@ define i32 @atomicrmw_max_i32_seq_cst(ptr %a, i32 %b) nounwind {
define i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_min_i32_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB170_2
; RV32I-NEXT: .LBB170_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB170_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB170_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB170_4
; RV32I-NEXT: .LBB170_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -22675,10 +22993,10 @@ define i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB170_1
; RV32I-NEXT: .LBB170_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_min_i32_monotonic:
@@ -22694,30 +23012,34 @@ define i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB170_2
; RV64I-NEXT: .LBB170_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB170_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB170_4
+; RV64I-NEXT: lwu a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB170_4
; RV64I-NEXT: .LBB170_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bge s2, a3, .LBB170_1
+; RV64I-NEXT: sext.w a1, a2
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bge s2, a1, .LBB170_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB170_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB170_1
; RV64I-NEXT: .LBB170_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -22736,24 +23058,27 @@ define i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind {
define i32 @atomicrmw_min_i32_acquire(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_min_i32_acquire:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB171_2
; RV32I-NEXT: .LBB171_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB171_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB171_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB171_4
; RV32I-NEXT: .LBB171_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -22764,10 +23089,10 @@ define i32 @atomicrmw_min_i32_acquire(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB171_1
; RV32I-NEXT: .LBB171_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-WMO-LABEL: atomicrmw_min_i32_acquire:
@@ -22788,30 +23113,34 @@ define i32 @atomicrmw_min_i32_acquire(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB171_2
; RV64I-NEXT: .LBB171_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB171_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB171_4
+; RV64I-NEXT: lwu a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB171_4
; RV64I-NEXT: .LBB171_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bge s2, a3, .LBB171_1
+; RV64I-NEXT: sext.w a1, a2
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bge s2, a1, .LBB171_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB171_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB171_1
; RV64I-NEXT: .LBB171_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -22835,24 +23164,27 @@ define i32 @atomicrmw_min_i32_acquire(ptr %a, i32 %b) nounwind {
define i32 @atomicrmw_min_i32_release(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_min_i32_release:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a3, 0(a0)
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB172_2
; RV32I-NEXT: .LBB172_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB172_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB172_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB172_4
; RV32I-NEXT: .LBB172_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -22863,10 +23195,10 @@ define i32 @atomicrmw_min_i32_release(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB172_1
; RV32I-NEXT: .LBB172_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-WMO-LABEL: atomicrmw_min_i32_release:
@@ -22887,30 +23219,34 @@ define i32 @atomicrmw_min_i32_release(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB172_2
; RV64I-NEXT: .LBB172_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB172_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB172_4
+; RV64I-NEXT: lwu a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB172_4
; RV64I-NEXT: .LBB172_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bge s2, a3, .LBB172_1
+; RV64I-NEXT: sext.w a1, a2
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bge s2, a1, .LBB172_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB172_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB172_1
; RV64I-NEXT: .LBB172_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -22934,24 +23270,27 @@ define i32 @atomicrmw_min_i32_release(ptr %a, i32 %b) nounwind {
define i32 @atomicrmw_min_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_min_i32_acq_rel:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB173_2
; RV32I-NEXT: .LBB173_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB173_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB173_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB173_4
; RV32I-NEXT: .LBB173_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -22962,10 +23301,10 @@ define i32 @atomicrmw_min_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB173_1
; RV32I-NEXT: .LBB173_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-WMO-LABEL: atomicrmw_min_i32_acq_rel:
@@ -22986,30 +23325,34 @@ define i32 @atomicrmw_min_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB173_2
; RV64I-NEXT: .LBB173_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB173_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB173_4
+; RV64I-NEXT: lwu a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB173_4
; RV64I-NEXT: .LBB173_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bge s2, a3, .LBB173_1
+; RV64I-NEXT: sext.w a1, a2
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bge s2, a1, .LBB173_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB173_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB173_1
; RV64I-NEXT: .LBB173_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -23033,24 +23376,27 @@ define i32 @atomicrmw_min_i32_acq_rel(ptr %a, i32 %b) nounwind {
define i32 @atomicrmw_min_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_min_i32_seq_cst:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB174_2
; RV32I-NEXT: .LBB174_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB174_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB174_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB174_4
; RV32I-NEXT: .LBB174_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -23061,10 +23407,10 @@ define i32 @atomicrmw_min_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB174_1
; RV32I-NEXT: .LBB174_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-WMO-LABEL: atomicrmw_min_i32_seq_cst:
@@ -23085,30 +23431,34 @@ define i32 @atomicrmw_min_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB174_2
; RV64I-NEXT: .LBB174_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB174_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB174_4
+; RV64I-NEXT: lwu a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB174_4
; RV64I-NEXT: .LBB174_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bge s2, a3, .LBB174_1
+; RV64I-NEXT: sext.w a1, a2
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bge s2, a1, .LBB174_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB174_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB174_1
; RV64I-NEXT: .LBB174_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -23132,24 +23482,27 @@ define i32 @atomicrmw_min_i32_seq_cst(ptr %a, i32 %b) nounwind {
define i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_umax_i32_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB175_2
; RV32I-NEXT: .LBB175_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB175_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB175_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB175_4
; RV32I-NEXT: .LBB175_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -23160,10 +23513,10 @@ define i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB175_1
; RV32I-NEXT: .LBB175_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umax_i32_monotonic:
@@ -23179,30 +23532,34 @@ define i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB175_2
; RV64I-NEXT: .LBB175_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB175_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB175_4
+; RV64I-NEXT: lwu a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB175_4
; RV64I-NEXT: .LBB175_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bltu s2, a3, .LBB175_1
+; RV64I-NEXT: sext.w a1, a2
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bltu s2, a1, .LBB175_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB175_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB175_1
; RV64I-NEXT: .LBB175_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -23221,24 +23578,27 @@ define i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind {
define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_umax_i32_acquire:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB176_2
; RV32I-NEXT: .LBB176_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB176_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB176_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB176_4
; RV32I-NEXT: .LBB176_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -23249,10 +23609,10 @@ define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB176_1
; RV32I-NEXT: .LBB176_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-WMO-LABEL: atomicrmw_umax_i32_acquire:
@@ -23273,30 +23633,34 @@ define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB176_2
; RV64I-NEXT: .LBB176_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB176_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB176_4
+; RV64I-NEXT: lwu a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB176_4
; RV64I-NEXT: .LBB176_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bltu s2, a3, .LBB176_1
+; RV64I-NEXT: sext.w a1, a2
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bltu s2, a1, .LBB176_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB176_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB176_1
; RV64I-NEXT: .LBB176_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -23320,24 +23684,27 @@ define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind {
define i32 @atomicrmw_umax_i32_release(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_umax_i32_release:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB177_2
; RV32I-NEXT: .LBB177_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB177_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB177_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB177_4
; RV32I-NEXT: .LBB177_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -23348,10 +23715,10 @@ define i32 @atomicrmw_umax_i32_release(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB177_1
; RV32I-NEXT: .LBB177_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-WMO-LABEL: atomicrmw_umax_i32_release:
@@ -23372,30 +23739,34 @@ define i32 @atomicrmw_umax_i32_release(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB177_2
; RV64I-NEXT: .LBB177_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB177_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB177_4
+; RV64I-NEXT: lwu a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB177_4
; RV64I-NEXT: .LBB177_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bltu s2, a3, .LBB177_1
+; RV64I-NEXT: sext.w a1, a2
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bltu s2, a1, .LBB177_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB177_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB177_1
; RV64I-NEXT: .LBB177_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -23419,24 +23790,27 @@ define i32 @atomicrmw_umax_i32_release(ptr %a, i32 %b) nounwind {
define i32 @atomicrmw_umax_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_umax_i32_acq_rel:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB178_2
; RV32I-NEXT: .LBB178_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB178_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB178_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB178_4
; RV32I-NEXT: .LBB178_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -23447,10 +23821,10 @@ define i32 @atomicrmw_umax_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB178_1
; RV32I-NEXT: .LBB178_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-WMO-LABEL: atomicrmw_umax_i32_acq_rel:
@@ -23471,30 +23845,34 @@ define i32 @atomicrmw_umax_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB178_2
; RV64I-NEXT: .LBB178_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB178_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB178_4
+; RV64I-NEXT: lwu a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB178_4
; RV64I-NEXT: .LBB178_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bltu s2, a3, .LBB178_1
+; RV64I-NEXT: sext.w a1, a2
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bltu s2, a1, .LBB178_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB178_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB178_1
; RV64I-NEXT: .LBB178_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -23518,24 +23896,27 @@ define i32 @atomicrmw_umax_i32_acq_rel(ptr %a, i32 %b) nounwind {
define i32 @atomicrmw_umax_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_umax_i32_seq_cst:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB179_2
; RV32I-NEXT: .LBB179_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB179_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB179_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB179_4
; RV32I-NEXT: .LBB179_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -23546,10 +23927,10 @@ define i32 @atomicrmw_umax_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB179_1
; RV32I-NEXT: .LBB179_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-WMO-LABEL: atomicrmw_umax_i32_seq_cst:
@@ -23570,30 +23951,34 @@ define i32 @atomicrmw_umax_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB179_2
; RV64I-NEXT: .LBB179_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB179_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB179_4
+; RV64I-NEXT: lwu a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB179_4
; RV64I-NEXT: .LBB179_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bltu s2, a3, .LBB179_1
+; RV64I-NEXT: sext.w a1, a2
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bltu s2, a1, .LBB179_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB179_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB179_1
; RV64I-NEXT: .LBB179_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -23617,24 +24002,27 @@ define i32 @atomicrmw_umax_i32_seq_cst(ptr %a, i32 %b) nounwind {
define i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_umin_i32_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB180_2
; RV32I-NEXT: .LBB180_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB180_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB180_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB180_4
; RV32I-NEXT: .LBB180_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -23645,10 +24033,10 @@ define i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB180_1
; RV32I-NEXT: .LBB180_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umin_i32_monotonic:
@@ -23664,30 +24052,34 @@ define i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB180_2
; RV64I-NEXT: .LBB180_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB180_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB180_4
+; RV64I-NEXT: lwu a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB180_4
; RV64I-NEXT: .LBB180_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bgeu s2, a3, .LBB180_1
+; RV64I-NEXT: sext.w a1, a2
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bgeu s2, a1, .LBB180_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB180_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB180_1
; RV64I-NEXT: .LBB180_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -23706,24 +24098,27 @@ define i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind {
define i32 @atomicrmw_umin_i32_acquire(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_umin_i32_acquire:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB181_2
; RV32I-NEXT: .LBB181_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB181_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB181_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB181_4
; RV32I-NEXT: .LBB181_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -23734,10 +24129,10 @@ define i32 @atomicrmw_umin_i32_acquire(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB181_1
; RV32I-NEXT: .LBB181_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-WMO-LABEL: atomicrmw_umin_i32_acquire:
@@ -23758,30 +24153,34 @@ define i32 @atomicrmw_umin_i32_acquire(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB181_2
; RV64I-NEXT: .LBB181_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB181_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB181_4
+; RV64I-NEXT: lwu a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB181_4
; RV64I-NEXT: .LBB181_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bgeu s2, a3, .LBB181_1
+; RV64I-NEXT: sext.w a1, a2
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bgeu s2, a1, .LBB181_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB181_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB181_1
; RV64I-NEXT: .LBB181_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -23805,24 +24204,27 @@ define i32 @atomicrmw_umin_i32_acquire(ptr %a, i32 %b) nounwind {
define i32 @atomicrmw_umin_i32_release(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_umin_i32_release:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB182_2
; RV32I-NEXT: .LBB182_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB182_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB182_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB182_4
; RV32I-NEXT: .LBB182_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -23833,10 +24235,10 @@ define i32 @atomicrmw_umin_i32_release(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB182_1
; RV32I-NEXT: .LBB182_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-WMO-LABEL: atomicrmw_umin_i32_release:
@@ -23857,30 +24259,34 @@ define i32 @atomicrmw_umin_i32_release(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB182_2
; RV64I-NEXT: .LBB182_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB182_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB182_4
+; RV64I-NEXT: lwu a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB182_4
; RV64I-NEXT: .LBB182_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bgeu s2, a3, .LBB182_1
+; RV64I-NEXT: sext.w a1, a2
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bgeu s2, a1, .LBB182_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB182_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB182_1
; RV64I-NEXT: .LBB182_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -23904,24 +24310,27 @@ define i32 @atomicrmw_umin_i32_release(ptr %a, i32 %b) nounwind {
define i32 @atomicrmw_umin_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_umin_i32_acq_rel:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB183_2
; RV32I-NEXT: .LBB183_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB183_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB183_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB183_4
; RV32I-NEXT: .LBB183_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -23932,10 +24341,10 @@ define i32 @atomicrmw_umin_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB183_1
; RV32I-NEXT: .LBB183_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-WMO-LABEL: atomicrmw_umin_i32_acq_rel:
@@ -23956,30 +24365,34 @@ define i32 @atomicrmw_umin_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB183_2
; RV64I-NEXT: .LBB183_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB183_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB183_4
+; RV64I-NEXT: lwu a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB183_4
; RV64I-NEXT: .LBB183_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bgeu s2, a3, .LBB183_1
+; RV64I-NEXT: sext.w a1, a2
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bgeu s2, a1, .LBB183_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB183_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB183_1
; RV64I-NEXT: .LBB183_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -24003,24 +24416,27 @@ define i32 @atomicrmw_umin_i32_acq_rel(ptr %a, i32 %b) nounwind {
define i32 @atomicrmw_umin_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_umin_i32_seq_cst:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB184_2
; RV32I-NEXT: .LBB184_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB184_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB184_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB184_4
; RV32I-NEXT: .LBB184_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -24031,10 +24447,10 @@ define i32 @atomicrmw_umin_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB184_1
; RV32I-NEXT: .LBB184_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-WMO-LABEL: atomicrmw_umin_i32_seq_cst:
@@ -24055,30 +24471,34 @@ define i32 @atomicrmw_umin_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB184_2
; RV64I-NEXT: .LBB184_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB184_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB184_4
+; RV64I-NEXT: lwu a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB184_4
; RV64I-NEXT: .LBB184_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bgeu s2, a3, .LBB184_1
+; RV64I-NEXT: sext.w a1, a2
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bgeu s2, a1, .LBB184_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB184_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB184_1
; RV64I-NEXT: .LBB184_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -26053,11 +26473,11 @@ define i64 @atomicrmw_xor_i64_seq_cst(ptr %a, i64 %b) nounwind {
define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_max_i64_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -26066,16 +26486,21 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB220_2
; RV32I-NEXT: .LBB220_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB220_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB220_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB220_7
; RV32I-NEXT: .LBB220_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB220_4
@@ -26098,20 +26523,20 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB220_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_max_i64_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -26120,16 +26545,21 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB220_2
; RV32IA-NEXT: .LBB220_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB220_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB220_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB220_7
; RV32IA-NEXT: .LBB220_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB220_4
@@ -26152,33 +26582,35 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB220_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i64_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB220_2
; RV64I-NEXT: .LBB220_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB220_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB220_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB220_4
; RV64I-NEXT: .LBB220_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -26189,10 +26621,10 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB220_1
; RV64I-NEXT: .LBB220_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_max_i64_monotonic:
@@ -26206,11 +26638,11 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_max_i64_acquire:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -26219,16 +26651,21 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB221_2
; RV32I-NEXT: .LBB221_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB221_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB221_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB221_7
; RV32I-NEXT: .LBB221_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB221_4
@@ -26251,20 +26688,20 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB221_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_max_i64_acquire:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -26273,16 +26710,21 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB221_2
; RV32IA-NEXT: .LBB221_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB221_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: li a4, 2
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB221_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB221_7
; RV32IA-NEXT: .LBB221_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB221_4
@@ -26305,33 +26747,35 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB221_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i64_acquire:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: ld a3, 0(a0)
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB221_2
; RV64I-NEXT: .LBB221_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB221_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB221_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB221_4
; RV64I-NEXT: .LBB221_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -26342,10 +26786,10 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB221_1
; RV64I-NEXT: .LBB221_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: atomicrmw_max_i64_acquire:
@@ -26364,11 +26808,11 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind {
define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_max_i64_release:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -26377,16 +26821,21 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB222_2
; RV32I-NEXT: .LBB222_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB222_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: li a4, 3
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB222_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB222_7
; RV32I-NEXT: .LBB222_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB222_4
@@ -26409,20 +26858,20 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB222_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_max_i64_release:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -26431,16 +26880,21 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB222_2
; RV32IA-NEXT: .LBB222_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB222_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: li a4, 3
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB222_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB222_7
; RV32IA-NEXT: .LBB222_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB222_4
@@ -26463,33 +26917,35 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB222_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i64_release:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB222_2
; RV64I-NEXT: .LBB222_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB222_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB222_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB222_4
; RV64I-NEXT: .LBB222_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -26500,10 +26956,10 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB222_1
; RV64I-NEXT: .LBB222_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: atomicrmw_max_i64_release:
@@ -26522,11 +26978,11 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind {
define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_max_i64_acq_rel:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -26535,16 +26991,21 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB223_2
; RV32I-NEXT: .LBB223_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB223_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: li a4, 4
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB223_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB223_7
; RV32I-NEXT: .LBB223_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB223_4
@@ -26567,20 +27028,20 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB223_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_max_i64_acq_rel:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -26589,16 +27050,21 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB223_2
; RV32IA-NEXT: .LBB223_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB223_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: li a4, 4
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB223_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB223_7
; RV32IA-NEXT: .LBB223_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB223_4
@@ -26621,33 +27087,35 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB223_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i64_acq_rel:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB223_2
; RV64I-NEXT: .LBB223_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB223_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB223_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB223_4
; RV64I-NEXT: .LBB223_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -26658,10 +27126,10 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB223_1
; RV64I-NEXT: .LBB223_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: atomicrmw_max_i64_acq_rel:
@@ -26680,11 +27148,11 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind {
define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_max_i64_seq_cst:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -26693,16 +27161,21 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB224_2
; RV32I-NEXT: .LBB224_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB224_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB224_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB224_7
; RV32I-NEXT: .LBB224_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB224_4
@@ -26725,20 +27198,20 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB224_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_max_i64_seq_cst:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -26747,16 +27220,21 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB224_2
; RV32IA-NEXT: .LBB224_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB224_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB224_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB224_7
; RV32IA-NEXT: .LBB224_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB224_4
@@ -26779,33 +27257,35 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB224_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i64_seq_cst:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB224_2
; RV64I-NEXT: .LBB224_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB224_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB224_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB224_4
; RV64I-NEXT: .LBB224_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -26816,10 +27296,10 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB224_1
; RV64I-NEXT: .LBB224_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: atomicrmw_max_i64_seq_cst:
@@ -26838,11 +27318,11 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind {
define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_min_i64_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -26851,16 +27331,21 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB225_2
; RV32I-NEXT: .LBB225_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB225_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB225_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB225_7
; RV32I-NEXT: .LBB225_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB225_4
@@ -26883,20 +27368,20 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB225_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_min_i64_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -26905,16 +27390,21 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB225_2
; RV32IA-NEXT: .LBB225_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB225_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB225_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB225_7
; RV32IA-NEXT: .LBB225_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB225_4
@@ -26937,33 +27427,35 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB225_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i64_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB225_2
; RV64I-NEXT: .LBB225_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB225_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB225_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB225_4
; RV64I-NEXT: .LBB225_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -26974,10 +27466,10 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB225_1
; RV64I-NEXT: .LBB225_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_min_i64_monotonic:
@@ -26991,11 +27483,11 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_min_i64_acquire:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -27004,16 +27496,21 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB226_2
; RV32I-NEXT: .LBB226_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB226_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB226_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB226_7
; RV32I-NEXT: .LBB226_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB226_4
@@ -27036,20 +27533,20 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB226_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_min_i64_acquire:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -27058,16 +27555,21 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB226_2
; RV32IA-NEXT: .LBB226_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB226_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: li a4, 2
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB226_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB226_7
; RV32IA-NEXT: .LBB226_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB226_4
@@ -27090,33 +27592,35 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB226_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i64_acquire:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB226_2
; RV64I-NEXT: .LBB226_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB226_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB226_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB226_4
; RV64I-NEXT: .LBB226_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -27127,10 +27631,10 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB226_1
; RV64I-NEXT: .LBB226_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: atomicrmw_min_i64_acquire:
@@ -27149,11 +27653,11 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind {
define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_min_i64_release:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -27162,16 +27666,21 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB227_2
; RV32I-NEXT: .LBB227_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB227_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: li a4, 3
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB227_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB227_7
; RV32I-NEXT: .LBB227_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB227_4
@@ -27194,20 +27703,20 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB227_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_min_i64_release:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -27216,16 +27725,21 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB227_2
; RV32IA-NEXT: .LBB227_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB227_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: li a4, 3
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB227_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB227_7
; RV32IA-NEXT: .LBB227_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB227_4
@@ -27248,33 +27762,35 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB227_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i64_release:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB227_2
; RV64I-NEXT: .LBB227_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB227_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB227_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB227_4
; RV64I-NEXT: .LBB227_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -27285,10 +27801,10 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB227_1
; RV64I-NEXT: .LBB227_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: atomicrmw_min_i64_release:
@@ -27307,11 +27823,11 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind {
define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_min_i64_acq_rel:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -27320,16 +27836,21 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB228_2
; RV32I-NEXT: .LBB228_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB228_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: li a4, 4
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB228_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB228_7
; RV32I-NEXT: .LBB228_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB228_4
@@ -27352,20 +27873,20 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB228_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_min_i64_acq_rel:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -27374,16 +27895,21 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB228_2
; RV32IA-NEXT: .LBB228_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB228_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: li a4, 4
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB228_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB228_7
; RV32IA-NEXT: .LBB228_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB228_4
@@ -27406,33 +27932,35 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB228_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i64_acq_rel:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB228_2
; RV64I-NEXT: .LBB228_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB228_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB228_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB228_4
; RV64I-NEXT: .LBB228_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -27443,10 +27971,10 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB228_1
; RV64I-NEXT: .LBB228_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: atomicrmw_min_i64_acq_rel:
@@ -27465,11 +27993,11 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind {
define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_min_i64_seq_cst:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -27478,16 +28006,21 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB229_2
; RV32I-NEXT: .LBB229_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB229_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB229_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB229_7
; RV32I-NEXT: .LBB229_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB229_4
@@ -27510,20 +28043,20 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB229_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_min_i64_seq_cst:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -27532,16 +28065,21 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB229_2
; RV32IA-NEXT: .LBB229_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB229_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB229_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB229_7
; RV32IA-NEXT: .LBB229_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB229_4
@@ -27564,33 +28102,35 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB229_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i64_seq_cst:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB229_2
; RV64I-NEXT: .LBB229_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB229_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB229_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB229_4
; RV64I-NEXT: .LBB229_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -27601,10 +28141,10 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB229_1
; RV64I-NEXT: .LBB229_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: atomicrmw_min_i64_seq_cst:
@@ -27623,11 +28163,11 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind {
define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_umax_i64_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -27636,16 +28176,21 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB230_2
; RV32I-NEXT: .LBB230_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB230_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB230_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB230_7
; RV32I-NEXT: .LBB230_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB230_4
@@ -27668,20 +28213,20 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB230_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umax_i64_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -27690,16 +28235,21 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB230_2
; RV32IA-NEXT: .LBB230_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB230_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB230_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB230_7
; RV32IA-NEXT: .LBB230_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB230_4
@@ -27722,33 +28272,35 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB230_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i64_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB230_2
; RV64I-NEXT: .LBB230_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB230_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB230_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB230_4
; RV64I-NEXT: .LBB230_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -27759,10 +28311,10 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB230_1
; RV64I-NEXT: .LBB230_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_umax_i64_monotonic:
@@ -27776,11 +28328,11 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_umax_i64_acquire:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -27789,16 +28341,21 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB231_2
; RV32I-NEXT: .LBB231_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB231_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB231_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB231_7
; RV32I-NEXT: .LBB231_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB231_4
@@ -27821,20 +28378,20 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB231_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umax_i64_acquire:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -27843,16 +28400,21 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB231_2
; RV32IA-NEXT: .LBB231_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB231_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: li a4, 2
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB231_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB231_7
; RV32IA-NEXT: .LBB231_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB231_4
@@ -27875,33 +28437,35 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB231_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i64_acquire:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB231_2
; RV64I-NEXT: .LBB231_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB231_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB231_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB231_4
; RV64I-NEXT: .LBB231_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -27912,10 +28476,10 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB231_1
; RV64I-NEXT: .LBB231_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: atomicrmw_umax_i64_acquire:
@@ -27934,11 +28498,11 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind {
define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_umax_i64_release:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -27947,16 +28511,21 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB232_2
; RV32I-NEXT: .LBB232_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB232_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: li a4, 3
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB232_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB232_7
; RV32I-NEXT: .LBB232_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB232_4
@@ -27979,20 +28548,20 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB232_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umax_i64_release:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -28001,16 +28570,21 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB232_2
; RV32IA-NEXT: .LBB232_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB232_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: li a4, 3
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB232_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB232_7
; RV32IA-NEXT: .LBB232_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB232_4
@@ -28033,33 +28607,35 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB232_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i64_release:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB232_2
; RV64I-NEXT: .LBB232_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB232_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB232_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB232_4
; RV64I-NEXT: .LBB232_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -28070,10 +28646,10 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB232_1
; RV64I-NEXT: .LBB232_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: atomicrmw_umax_i64_release:
@@ -28092,11 +28668,11 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind {
define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_umax_i64_acq_rel:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -28105,16 +28681,21 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB233_2
; RV32I-NEXT: .LBB233_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB233_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: li a4, 4
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB233_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB233_7
; RV32I-NEXT: .LBB233_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB233_4
@@ -28137,20 +28718,20 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB233_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umax_i64_acq_rel:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -28159,16 +28740,21 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB233_2
; RV32IA-NEXT: .LBB233_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB233_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: li a4, 4
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB233_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB233_7
; RV32IA-NEXT: .LBB233_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB233_4
@@ -28191,33 +28777,35 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB233_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i64_acq_rel:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB233_2
; RV64I-NEXT: .LBB233_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB233_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB233_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB233_4
; RV64I-NEXT: .LBB233_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -28228,10 +28816,10 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB233_1
; RV64I-NEXT: .LBB233_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: atomicrmw_umax_i64_acq_rel:
@@ -28250,11 +28838,11 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind {
define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_umax_i64_seq_cst:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -28263,16 +28851,21 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB234_2
; RV32I-NEXT: .LBB234_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB234_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB234_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB234_7
; RV32I-NEXT: .LBB234_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB234_4
@@ -28295,20 +28888,20 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB234_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umax_i64_seq_cst:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -28317,16 +28910,21 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB234_2
; RV32IA-NEXT: .LBB234_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB234_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB234_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB234_7
; RV32IA-NEXT: .LBB234_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB234_4
@@ -28349,33 +28947,35 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB234_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i64_seq_cst:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB234_2
; RV64I-NEXT: .LBB234_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB234_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB234_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB234_4
; RV64I-NEXT: .LBB234_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -28386,10 +28986,10 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB234_1
; RV64I-NEXT: .LBB234_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: atomicrmw_umax_i64_seq_cst:
@@ -28408,11 +29008,11 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind {
define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_umin_i64_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -28421,16 +29021,21 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB235_2
; RV32I-NEXT: .LBB235_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB235_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB235_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB235_7
; RV32I-NEXT: .LBB235_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB235_4
@@ -28453,20 +29058,20 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB235_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umin_i64_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -28475,16 +29080,21 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB235_2
; RV32IA-NEXT: .LBB235_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB235_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB235_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB235_7
; RV32IA-NEXT: .LBB235_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB235_4
@@ -28507,33 +29117,35 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB235_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i64_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB235_2
; RV64I-NEXT: .LBB235_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB235_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB235_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB235_4
; RV64I-NEXT: .LBB235_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -28544,10 +29156,10 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB235_1
; RV64I-NEXT: .LBB235_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_umin_i64_monotonic:
@@ -28561,11 +29173,11 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_umin_i64_acquire:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -28574,16 +29186,21 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB236_2
; RV32I-NEXT: .LBB236_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB236_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB236_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB236_7
; RV32I-NEXT: .LBB236_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB236_4
@@ -28606,20 +29223,20 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB236_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umin_i64_acquire:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -28628,16 +29245,21 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB236_2
; RV32IA-NEXT: .LBB236_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB236_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: li a4, 2
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB236_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB236_7
; RV32IA-NEXT: .LBB236_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB236_4
@@ -28660,33 +29282,35 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB236_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i64_acquire:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB236_2
; RV64I-NEXT: .LBB236_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB236_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB236_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB236_4
; RV64I-NEXT: .LBB236_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -28697,10 +29321,10 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB236_1
; RV64I-NEXT: .LBB236_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: atomicrmw_umin_i64_acquire:
@@ -28719,11 +29343,11 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind {
define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_umin_i64_release:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -28732,16 +29356,21 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB237_2
; RV32I-NEXT: .LBB237_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB237_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: li a4, 3
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB237_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB237_7
; RV32I-NEXT: .LBB237_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB237_4
@@ -28764,20 +29393,20 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB237_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umin_i64_release:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -28786,16 +29415,21 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB237_2
; RV32IA-NEXT: .LBB237_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB237_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: li a4, 3
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB237_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB237_7
; RV32IA-NEXT: .LBB237_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB237_4
@@ -28818,33 +29452,35 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB237_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i64_release:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB237_2
; RV64I-NEXT: .LBB237_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB237_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB237_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB237_4
; RV64I-NEXT: .LBB237_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -28855,10 +29491,10 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB237_1
; RV64I-NEXT: .LBB237_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: atomicrmw_umin_i64_release:
@@ -28877,11 +29513,11 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind {
define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_umin_i64_acq_rel:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -28890,16 +29526,21 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB238_2
; RV32I-NEXT: .LBB238_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB238_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: li a4, 4
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB238_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB238_7
; RV32I-NEXT: .LBB238_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB238_4
@@ -28922,20 +29563,20 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB238_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umin_i64_acq_rel:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -28944,16 +29585,21 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB238_2
; RV32IA-NEXT: .LBB238_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB238_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: li a4, 4
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB238_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB238_7
; RV32IA-NEXT: .LBB238_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB238_4
@@ -28976,33 +29622,35 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB238_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i64_acq_rel:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB238_2
; RV64I-NEXT: .LBB238_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB238_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB238_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB238_4
; RV64I-NEXT: .LBB238_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -29013,10 +29661,10 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB238_1
; RV64I-NEXT: .LBB238_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: atomicrmw_umin_i64_acq_rel:
@@ -29035,11 +29683,11 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind {
define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_umin_i64_seq_cst:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -29048,16 +29696,21 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB239_2
; RV32I-NEXT: .LBB239_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB239_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB239_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB239_7
; RV32I-NEXT: .LBB239_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB239_4
@@ -29080,20 +29733,20 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB239_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umin_i64_seq_cst:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -29102,16 +29755,21 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB239_2
; RV32IA-NEXT: .LBB239_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB239_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB239_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB239_7
; RV32IA-NEXT: .LBB239_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB239_4
@@ -29134,33 +29792,35 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB239_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i64_seq_cst:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB239_2
; RV64I-NEXT: .LBB239_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB239_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB239_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB239_4
; RV64I-NEXT: .LBB239_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -29171,10 +29831,10 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB239_1
; RV64I-NEXT: .LBB239_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: atomicrmw_umin_i64_seq_cst:
diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll
index 775c17c3ceb3f9..da17f5e4e8351a 100644
--- a/llvm/test/CodeGen/RISCV/atomic-signext.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll
@@ -595,13 +595,16 @@ define signext i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB10_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB10_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB10_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB10_4
; RV32I-NEXT: .LBB10_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 24
@@ -661,33 +664,37 @@ define signext i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 56
; RV64I-NEXT: srai s2, a0, 56
; RV64I-NEXT: j .LBB10_2
; RV64I-NEXT: .LBB10_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB10_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB10_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB10_4
; RV64I-NEXT: .LBB10_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 56
-; RV64I-NEXT: srai a0, a0, 56
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: blt s2, a0, .LBB10_1
+; RV64I-NEXT: slli a0, a2, 56
+; RV64I-NEXT: srai a1, a0, 56
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: blt s2, a1, .LBB10_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB10_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB10_1
; RV64I-NEXT: .LBB10_4: # %atomicrmw.end
-; RV64I-NEXT: slli a0, a3, 56
+; RV64I-NEXT: slli a0, a2, 56
; RV64I-NEXT: srai a0, a0, 56
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -747,13 +754,16 @@ define signext i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB11_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB11_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB11_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB11_4
; RV32I-NEXT: .LBB11_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 24
@@ -813,33 +823,37 @@ define signext i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 56
; RV64I-NEXT: srai s2, a0, 56
; RV64I-NEXT: j .LBB11_2
; RV64I-NEXT: .LBB11_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB11_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB11_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB11_4
; RV64I-NEXT: .LBB11_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 56
-; RV64I-NEXT: srai a0, a0, 56
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bge s2, a0, .LBB11_1
+; RV64I-NEXT: slli a0, a2, 56
+; RV64I-NEXT: srai a1, a0, 56
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bge s2, a1, .LBB11_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB11_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB11_1
; RV64I-NEXT: .LBB11_4: # %atomicrmw.end
-; RV64I-NEXT: slli a0, a3, 56
+; RV64I-NEXT: slli a0, a2, 56
; RV64I-NEXT: srai a0, a0, 56
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -898,13 +912,16 @@ define signext i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB12_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB12_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB12_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB12_4
; RV32I-NEXT: .LBB12_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: andi a0, a3, 255
@@ -958,31 +975,35 @@ define signext i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: andi s2, a1, 255
; RV64I-NEXT: j .LBB12_2
; RV64I-NEXT: .LBB12_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB12_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB12_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB12_4
; RV64I-NEXT: .LBB12_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: andi a0, a3, 255
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bltu s2, a0, .LBB12_1
+; RV64I-NEXT: andi a1, a2, 255
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bltu s2, a1, .LBB12_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB12_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB12_1
; RV64I-NEXT: .LBB12_4: # %atomicrmw.end
-; RV64I-NEXT: slli a0, a3, 56
+; RV64I-NEXT: slli a0, a2, 56
; RV64I-NEXT: srai a0, a0, 56
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -1036,13 +1057,16 @@ define signext i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB13_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB13_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB13_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB13_4
; RV32I-NEXT: .LBB13_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: andi a0, a3, 255
@@ -1096,31 +1120,35 @@ define signext i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: andi s2, a1, 255
; RV64I-NEXT: j .LBB13_2
; RV64I-NEXT: .LBB13_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB13_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB13_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB13_4
; RV64I-NEXT: .LBB13_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: andi a0, a3, 255
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bgeu s2, a0, .LBB13_1
+; RV64I-NEXT: andi a1, a2, 255
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bgeu s2, a1, .LBB13_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB13_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB13_1
; RV64I-NEXT: .LBB13_4: # %atomicrmw.end
-; RV64I-NEXT: slli a0, a3, 56
+; RV64I-NEXT: slli a0, a2, 56
; RV64I-NEXT: srai a0, a0, 56
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -1643,13 +1671,16 @@ define signext i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB21_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB21_2 Depth=1
; RV32I-NEXT: sh a3, 14(sp)
+; RV32I-NEXT: sh a2, 12(sp)
; RV32I-NEXT: addi a1, sp, 14
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a3, 14(sp)
-; RV32I-NEXT: bnez a0, .LBB21_4
+; RV32I-NEXT: lhu a3, 14(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a3, 10(sp)
+; RV32I-NEXT: beqz a0, .LBB21_4
; RV32I-NEXT: .LBB21_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 16
@@ -1711,33 +1742,37 @@ define signext i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lhu a3, 0(a0)
+; RV64I-NEXT: lhu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 48
; RV64I-NEXT: srai s2, a0, 48
; RV64I-NEXT: j .LBB21_2
; RV64I-NEXT: .LBB21_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB21_2 Depth=1
-; RV64I-NEXT: sh a3, 14(sp)
+; RV64I-NEXT: sh a2, 14(sp)
+; RV64I-NEXT: sh a0, 12(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 14
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a3, 14(sp)
-; RV64I-NEXT: bnez a0, .LBB21_4
+; RV64I-NEXT: lhu a2, 14(sp)
+; RV64I-NEXT: sh a2, 10(sp)
+; RV64I-NEXT: beqz a0, .LBB21_4
; RV64I-NEXT: .LBB21_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 48
-; RV64I-NEXT: srai a0, a0, 48
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: blt s2, a0, .LBB21_1
+; RV64I-NEXT: slli a0, a2, 48
+; RV64I-NEXT: srai a1, a0, 48
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: blt s2, a1, .LBB21_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB21_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB21_1
; RV64I-NEXT: .LBB21_4: # %atomicrmw.end
-; RV64I-NEXT: slli a0, a3, 48
+; RV64I-NEXT: slli a0, a2, 48
; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -1799,13 +1834,16 @@ define signext i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB22_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB22_2 Depth=1
; RV32I-NEXT: sh a3, 14(sp)
+; RV32I-NEXT: sh a2, 12(sp)
; RV32I-NEXT: addi a1, sp, 14
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a3, 14(sp)
-; RV32I-NEXT: bnez a0, .LBB22_4
+; RV32I-NEXT: lhu a3, 14(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a3, 10(sp)
+; RV32I-NEXT: beqz a0, .LBB22_4
; RV32I-NEXT: .LBB22_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 16
@@ -1867,33 +1905,37 @@ define signext i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lhu a3, 0(a0)
+; RV64I-NEXT: lhu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 48
; RV64I-NEXT: srai s2, a0, 48
; RV64I-NEXT: j .LBB22_2
; RV64I-NEXT: .LBB22_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB22_2 Depth=1
-; RV64I-NEXT: sh a3, 14(sp)
+; RV64I-NEXT: sh a2, 14(sp)
+; RV64I-NEXT: sh a0, 12(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 14
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a3, 14(sp)
-; RV64I-NEXT: bnez a0, .LBB22_4
+; RV64I-NEXT: lhu a2, 14(sp)
+; RV64I-NEXT: sh a2, 10(sp)
+; RV64I-NEXT: beqz a0, .LBB22_4
; RV64I-NEXT: .LBB22_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a3, 48
-; RV64I-NEXT: srai a0, a0, 48
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bge s2, a0, .LBB22_1
+; RV64I-NEXT: slli a0, a2, 48
+; RV64I-NEXT: srai a1, a0, 48
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bge s2, a1, .LBB22_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB22_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB22_1
; RV64I-NEXT: .LBB22_4: # %atomicrmw.end
-; RV64I-NEXT: slli a0, a3, 48
+; RV64I-NEXT: slli a0, a2, 48
; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -1957,13 +1999,16 @@ define signext i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB23_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB23_2 Depth=1
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a1, 10(sp)
-; RV32I-NEXT: bnez a0, .LBB23_4
+; RV32I-NEXT: lhu a1, 10(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a1, 6(sp)
+; RV32I-NEXT: beqz a0, .LBB23_4
; RV32I-NEXT: .LBB23_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: and a0, a1, s2
@@ -2029,21 +2074,25 @@ define signext i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: .LBB23_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB23_2 Depth=1
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a0, 4(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a1, 6(sp)
-; RV64I-NEXT: bnez a0, .LBB23_4
+; RV64I-NEXT: lhu a1, 6(sp)
+; RV64I-NEXT: sh a1, 2(sp)
+; RV64I-NEXT: beqz a0, .LBB23_4
; RV64I-NEXT: .LBB23_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: and a0, a1, s2
-; RV64I-NEXT: mv a2, a1
-; RV64I-NEXT: bltu s3, a0, .LBB23_1
+; RV64I-NEXT: and a2, a1, s2
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: bltu s3, a2, .LBB23_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB23_2 Depth=1
-; RV64I-NEXT: mv a2, s0
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: j .LBB23_1
; RV64I-NEXT: .LBB23_4: # %atomicrmw.end
; RV64I-NEXT: slli a0, a1, 48
@@ -2105,13 +2154,16 @@ define signext i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB24_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB24_2 Depth=1
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a1, 10(sp)
-; RV32I-NEXT: bnez a0, .LBB24_4
+; RV32I-NEXT: lhu a1, 10(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a1, 6(sp)
+; RV32I-NEXT: beqz a0, .LBB24_4
; RV32I-NEXT: .LBB24_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: and a0, a1, s2
@@ -2177,21 +2229,25 @@ define signext i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: .LBB24_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB24_2 Depth=1
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a0, 4(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a1, 6(sp)
-; RV64I-NEXT: bnez a0, .LBB24_4
+; RV64I-NEXT: lhu a1, 6(sp)
+; RV64I-NEXT: sh a1, 2(sp)
+; RV64I-NEXT: beqz a0, .LBB24_4
; RV64I-NEXT: .LBB24_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: and a0, a1, s2
-; RV64I-NEXT: mv a2, a1
-; RV64I-NEXT: bgeu s3, a0, .LBB24_1
+; RV64I-NEXT: and a2, a1, s2
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: bgeu s3, a2, .LBB24_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB24_2 Depth=1
-; RV64I-NEXT: mv a2, s0
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: j .LBB24_1
; RV64I-NEXT: .LBB24_4: # %atomicrmw.end
; RV64I-NEXT: slli a0, a1, 48
@@ -2526,24 +2582,27 @@ define signext i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind {
define signext i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_max_i32_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB32_2
; RV32I-NEXT: .LBB32_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB32_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB32_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB32_4
; RV32I-NEXT: .LBB32_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -2554,10 +2613,10 @@ define signext i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB32_1
; RV32I-NEXT: .LBB32_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_max_i32_monotonic:
@@ -2573,30 +2632,33 @@ define signext i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB32_2
; RV64I-NEXT: .LBB32_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB32_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB32_4
+; RV64I-NEXT: lw a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB32_4
; RV64I-NEXT: .LBB32_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: blt s2, a3, .LBB32_1
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: blt s2, a2, .LBB32_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB32_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB32_1
; RV64I-NEXT: .LBB32_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -2615,24 +2677,27 @@ define signext i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind {
define signext i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_min_i32_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB33_2
; RV32I-NEXT: .LBB33_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB33_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB33_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB33_4
; RV32I-NEXT: .LBB33_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -2643,10 +2708,10 @@ define signext i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB33_1
; RV32I-NEXT: .LBB33_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_min_i32_monotonic:
@@ -2662,30 +2727,33 @@ define signext i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB33_2
; RV64I-NEXT: .LBB33_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB33_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB33_4
+; RV64I-NEXT: lw a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB33_4
; RV64I-NEXT: .LBB33_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bge s2, a3, .LBB33_1
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bge s2, a2, .LBB33_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB33_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB33_1
; RV64I-NEXT: .LBB33_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -2704,24 +2772,27 @@ define signext i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind {
define signext i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_umax_i32_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB34_2
; RV32I-NEXT: .LBB34_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB34_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB34_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB34_4
; RV32I-NEXT: .LBB34_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -2732,10 +2803,10 @@ define signext i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB34_1
; RV32I-NEXT: .LBB34_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umax_i32_monotonic:
@@ -2751,30 +2822,33 @@ define signext i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB34_2
; RV64I-NEXT: .LBB34_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB34_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB34_4
+; RV64I-NEXT: lw a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB34_4
; RV64I-NEXT: .LBB34_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bltu s2, a3, .LBB34_1
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bltu s2, a2, .LBB34_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB34_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB34_1
; RV64I-NEXT: .LBB34_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -2793,24 +2867,27 @@ define signext i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind {
define signext i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_umin_i32_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB35_2
; RV32I-NEXT: .LBB35_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB35_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB35_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB35_4
; RV32I-NEXT: .LBB35_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -2821,10 +2898,10 @@ define signext i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB35_1
; RV32I-NEXT: .LBB35_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umin_i32_monotonic:
@@ -2840,30 +2917,33 @@ define signext i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB35_2
; RV64I-NEXT: .LBB35_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB35_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB35_4
+; RV64I-NEXT: lw a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB35_4
; RV64I-NEXT: .LBB35_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: bgeu s2, a3, .LBB35_1
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: bgeu s2, a2, .LBB35_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB35_2 Depth=1
-; RV64I-NEXT: mv a2, s1
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: j .LBB35_1
; RV64I-NEXT: .LBB35_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -3177,11 +3257,11 @@ define signext i64 @atomicrmw_xor_i64_monotonic(ptr %a, i64 %b) nounwind {
define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_max_i64_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -3190,16 +3270,21 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB43_2
; RV32I-NEXT: .LBB43_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB43_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB43_7
; RV32I-NEXT: .LBB43_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB43_4
@@ -3222,20 +3307,20 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB43_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_max_i64_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -3244,16 +3329,21 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB43_2
; RV32IA-NEXT: .LBB43_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB43_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB43_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB43_7
; RV32IA-NEXT: .LBB43_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB43_4
@@ -3276,33 +3366,35 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB43_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i64_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB43_2
; RV64I-NEXT: .LBB43_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB43_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB43_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB43_4
; RV64I-NEXT: .LBB43_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -3313,10 +3405,10 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB43_1
; RV64I-NEXT: .LBB43_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_max_i64_monotonic:
@@ -3330,11 +3422,11 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_min_i64_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -3343,16 +3435,21 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB44_2
; RV32I-NEXT: .LBB44_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB44_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB44_7
; RV32I-NEXT: .LBB44_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB44_4
@@ -3375,20 +3472,20 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB44_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_min_i64_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -3397,16 +3494,21 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB44_2
; RV32IA-NEXT: .LBB44_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB44_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB44_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB44_7
; RV32IA-NEXT: .LBB44_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB44_4
@@ -3429,33 +3531,35 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB44_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i64_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB44_2
; RV64I-NEXT: .LBB44_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB44_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB44_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB44_4
; RV64I-NEXT: .LBB44_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -3466,10 +3570,10 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB44_1
; RV64I-NEXT: .LBB44_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_min_i64_monotonic:
@@ -3483,11 +3587,11 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_umax_i64_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -3496,16 +3600,21 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB45_2
; RV32I-NEXT: .LBB45_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB45_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB45_7
; RV32I-NEXT: .LBB45_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB45_4
@@ -3528,20 +3637,20 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB45_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umax_i64_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -3550,16 +3659,21 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB45_2
; RV32IA-NEXT: .LBB45_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB45_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB45_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB45_7
; RV32IA-NEXT: .LBB45_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB45_4
@@ -3582,33 +3696,35 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB45_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i64_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB45_2
; RV64I-NEXT: .LBB45_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB45_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB45_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB45_4
; RV64I-NEXT: .LBB45_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -3619,10 +3735,10 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB45_1
; RV64I-NEXT: .LBB45_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_umax_i64_monotonic:
@@ -3636,11 +3752,11 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_umin_i64_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
@@ -3649,16 +3765,21 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB46_2
; RV32I-NEXT: .LBB46_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB46_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB46_7
; RV32I-NEXT: .LBB46_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB46_4
@@ -3681,20 +3802,20 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: .LBB46_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umin_i64_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: lw a4, 0(a0)
@@ -3703,16 +3824,21 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB46_2
; RV32IA-NEXT: .LBB46_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB46_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB46_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB46_7
; RV32IA-NEXT: .LBB46_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB46_4
@@ -3735,33 +3861,35 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: .LBB46_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i64_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB46_2
; RV64I-NEXT: .LBB46_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB46_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB46_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB46_4
; RV64I-NEXT: .LBB46_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -3772,10 +3900,10 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB46_1
; RV64I-NEXT: .LBB46_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_umin_i64_monotonic:
@@ -3792,11 +3920,14 @@ define signext i8 @cmpxchg_i8_monotonic_monotonic_val0(ptr %ptr, i8 signext %cmp
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
+; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
-; RV32I-NEXT: lb a0, 11(sp)
+; RV32I-NEXT: lbu a0, 11(sp)
+; RV32I-NEXT: sb a0, 9(sp)
+; RV32I-NEXT: lb a0, 9(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -3832,11 +3963,14 @@ define signext i8 @cmpxchg_i8_monotonic_monotonic_val0(ptr %ptr, i8 signext %cmp
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
+; RV64I-NEXT: sb a2, 6(sp)
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lb a0, 7(sp)
+; RV64I-NEXT: lbu a0, 7(sp)
+; RV64I-NEXT: sb a0, 5(sp)
+; RV64I-NEXT: lb a0, 5(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -3877,10 +4011,15 @@ define i1 @cmpxchg_i8_monotonic_monotonic_val1(ptr %ptr, i8 signext %cmp, i8 sig
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
+; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
+; RV32I-NEXT: lbu a1, 11(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: sb a1, 9(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -3916,10 +4055,14 @@ define i1 @cmpxchg_i8_monotonic_monotonic_val1(ptr %ptr, i8 signext %cmp, i8 sig
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
+; RV64I-NEXT: sb a2, 6(sp)
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
+; RV64I-NEXT: lbu a1, 7(sp)
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: sb a1, 5(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -3960,11 +4103,14 @@ define signext i16 @cmpxchg_i16_monotonic_monotonic_val0(ptr %ptr, i16 signext %
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
; RV32I-NEXT: lh a0, 10(sp)
+; RV32I-NEXT: sh a0, 6(sp)
+; RV32I-NEXT: lh a0, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4001,11 +4147,14 @@ define signext i16 @cmpxchg_i16_monotonic_monotonic_val0(ptr %ptr, i16 signext %
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a2, 4(sp)
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
; RV64I-NEXT: lh a0, 6(sp)
+; RV64I-NEXT: sh a0, 2(sp)
+; RV64I-NEXT: lh a0, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -4047,10 +4196,15 @@ define i1 @cmpxchg_i16_monotonic_monotonic_val1(ptr %ptr, i16 signext %cmp, i16
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
+; RV32I-NEXT: lh a1, 10(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: sh a1, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4087,10 +4241,14 @@ define i1 @cmpxchg_i16_monotonic_monotonic_val1(ptr %ptr, i16 signext %cmp, i16
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a2, 4(sp)
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
+; RV64I-NEXT: lh a1, 6(sp)
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: sh a1, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -4132,11 +4290,13 @@ define signext i32 @cmpxchg_i32_monotonic_monotonic_val0(ptr %ptr, i32 signext %
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
; RV32I-NEXT: lw a0, 8(sp)
+; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4161,16 +4321,19 @@ define signext i32 @cmpxchg_i32_monotonic_monotonic_val0(ptr %ptr, i32 signext %
;
; RV64I-LABEL: cmpxchg_i32_monotonic_monotonic_val0:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 4(sp)
-; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 20(sp)
+; RV64I-NEXT: sw a2, 16(sp)
+; RV64I-NEXT: addi a1, sp, 20
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a0, 4(sp)
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: lw a0, 20(sp)
+; RV64I-NEXT: sw a0, 12(sp)
+; RV64I-NEXT: lw a0, 12(sp)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-NOZACAS-LABEL: cmpxchg_i32_monotonic_monotonic_val0:
@@ -4201,10 +4364,15 @@ define i1 @cmpxchg_i32_monotonic_monotonic_val1(ptr %ptr, i32 signext %cmp, i32
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
+; RV32I-NEXT: lw a1, 8(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: sw a1, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4232,15 +4400,19 @@ define i1 @cmpxchg_i32_monotonic_monotonic_val1(ptr %ptr, i32 signext %cmp, i32
;
; RV64I-LABEL: cmpxchg_i32_monotonic_monotonic_val1:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 4(sp)
-; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 20(sp)
+; RV64I-NEXT: sw a2, 16(sp)
+; RV64I-NEXT: addi a1, sp, 20
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: lw a1, 20(sp)
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: sw a1, 12(sp)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-NOZACAS-LABEL: cmpxchg_i32_monotonic_monotonic_val1:
@@ -4946,9 +5118,9 @@ merge:
define signext i32 @atomicrmw_max_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind {
; RV32I-LABEL: atomicrmw_max_i32_monotonic_crossbb:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: beqz a1, .LBB60_5
@@ -4957,14 +5129,17 @@ define signext i32 @atomicrmw_max_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32I-NEXT: j .LBB60_3
; RV32I-NEXT: .LBB60_2: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB60_3 Depth=1
-; RV32I-NEXT: sw a1, 4(sp)
-; RV32I-NEXT: addi a1, sp, 4
+; RV32I-NEXT: sw a1, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 20
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a1, 4(sp)
-; RV32I-NEXT: bnez a0, .LBB60_8
+; RV32I-NEXT: lw a1, 20(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a1, 12(sp)
+; RV32I-NEXT: beqz a0, .LBB60_8
; RV32I-NEXT: .LBB60_3: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a1
@@ -4983,9 +5158,9 @@ define signext i32 @atomicrmw_max_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32I-NEXT: sw a0, 0(s0)
; RV32I-NEXT: .LBB60_8: # %merge
; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_max_i32_monotonic_crossbb:
@@ -5021,13 +5196,15 @@ define signext i32 @atomicrmw_max_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64I-NEXT: .LBB60_2: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB60_3 Depth=1
; RV64I-NEXT: sw a1, 12(sp)
+; RV64I-NEXT: sw a2, 8(sp)
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
; RV64I-NEXT: lw a1, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB60_8
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB60_8
; RV64I-NEXT: .LBB60_3: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: li a0, 1
@@ -5091,10 +5268,10 @@ declare i32 @llvm.smax.i32(i32, i32)
define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind {
; RV32I-LABEL: atomicrmw_min_i32_monotonic_crossbb:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: beqz a1, .LBB61_5
@@ -5104,14 +5281,17 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32I-NEXT: j .LBB61_3
; RV32I-NEXT: .LBB61_2: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB61_3 Depth=1
-; RV32I-NEXT: sw a1, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a1, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a1, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB61_8
+; RV32I-NEXT: lw a1, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB61_8
; RV32I-NEXT: .LBB61_3: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a1
@@ -5130,10 +5310,10 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32I-NEXT: sw a0, 0(s0)
; RV32I-NEXT: .LBB61_8: # %merge
; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_min_i32_monotonic_crossbb:
@@ -5157,10 +5337,10 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
;
; RV64I-LABEL: atomicrmw_min_i32_monotonic_crossbb:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: beqz a1, .LBB61_5
@@ -5170,14 +5350,16 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64I-NEXT: j .LBB61_3
; RV64I-NEXT: .LBB61_2: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB61_3 Depth=1
-; RV64I-NEXT: sw a1, 4(sp)
-; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: sw a1, 20(sp)
+; RV64I-NEXT: sw a2, 16(sp)
+; RV64I-NEXT: addi a1, sp, 20
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a1, 4(sp)
-; RV64I-NEXT: bnez a0, .LBB61_8
+; RV64I-NEXT: lw a1, 20(sp)
+; RV64I-NEXT: sw a1, 12(sp)
+; RV64I-NEXT: beqz a0, .LBB61_8
; RV64I-NEXT: .LBB61_3: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a1
@@ -5196,10 +5378,10 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64I-NEXT: sw a0, 0(s0)
; RV64I-NEXT: .LBB61_8: # %merge
; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_min_i32_monotonic_crossbb:
@@ -5241,9 +5423,9 @@ declare i32 @llvm.smin.i32(i32, i32)
define signext i32 @atomicrmw_umax_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind {
; RV32I-LABEL: atomicrmw_umax_i32_monotonic_crossbb:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: beqz a1, .LBB62_3
@@ -5253,14 +5435,17 @@ define signext i32 @atomicrmw_umax_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: seqz a2, a1
; RV32I-NEXT: add a2, a1, a2
-; RV32I-NEXT: sw a1, 4(sp)
-; RV32I-NEXT: addi a1, sp, 4
+; RV32I-NEXT: sw a1, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 20
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a1, 4(sp)
-; RV32I-NEXT: beqz a0, .LBB62_2
+; RV32I-NEXT: lw a1, 20(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a1, 12(sp)
+; RV32I-NEXT: bnez a0, .LBB62_2
; RV32I-NEXT: j .LBB62_4
; RV32I-NEXT: .LBB62_3: # %else
; RV32I-NEXT: lw a1, 0(s0)
@@ -5269,9 +5454,9 @@ define signext i32 @atomicrmw_umax_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32I-NEXT: sw a0, 0(s0)
; RV32I-NEXT: .LBB62_4: # %merge
; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umax_i32_monotonic_crossbb:
@@ -5304,13 +5489,15 @@ define signext i32 @atomicrmw_umax_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64I-NEXT: .LBB62_2: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB62_3 Depth=1
; RV64I-NEXT: sw a1, 12(sp)
+; RV64I-NEXT: sw a2, 8(sp)
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
; RV64I-NEXT: lw a1, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB62_6
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB62_6
; RV64I-NEXT: .LBB62_3: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: li a0, 1
@@ -5368,10 +5555,10 @@ declare i32 @llvm.umax.i32(i32, i32)
define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind {
; RV32I-LABEL: atomicrmw_umin_i32_monotonic_crossbb:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: beqz a1, .LBB63_5
@@ -5381,14 +5568,17 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32I-NEXT: j .LBB63_3
; RV32I-NEXT: .LBB63_2: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB63_3 Depth=1
-; RV32I-NEXT: sw a1, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a1, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a1, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB63_8
+; RV32I-NEXT: lw a1, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB63_8
; RV32I-NEXT: .LBB63_3: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a1
@@ -5408,10 +5598,10 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32I-NEXT: sw a0, 0(s0)
; RV32I-NEXT: .LBB63_8: # %merge
; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umin_i32_monotonic_crossbb:
@@ -5436,10 +5626,10 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
;
; RV64I-LABEL: atomicrmw_umin_i32_monotonic_crossbb:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: beqz a1, .LBB63_5
@@ -5449,14 +5639,16 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64I-NEXT: j .LBB63_3
; RV64I-NEXT: .LBB63_2: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB63_3 Depth=1
-; RV64I-NEXT: sw a1, 4(sp)
-; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: sw a1, 20(sp)
+; RV64I-NEXT: sw a2, 16(sp)
+; RV64I-NEXT: addi a1, sp, 20
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a1, 4(sp)
-; RV64I-NEXT: bnez a0, .LBB63_8
+; RV64I-NEXT: lw a1, 20(sp)
+; RV64I-NEXT: sw a1, 12(sp)
+; RV64I-NEXT: beqz a0, .LBB63_8
; RV64I-NEXT: .LBB63_3: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a1
@@ -5476,10 +5668,10 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64I-NEXT: sw a0, 0(s0)
; RV64I-NEXT: .LBB63_8: # %merge
; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_umin_i32_monotonic_crossbb:
@@ -5527,11 +5719,13 @@ define signext i32 @cmpxchg_i32_monotonic_crossbb(ptr %ptr, i32 signext %cmp, i3
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: call __atomic_compare_exchange_4
; RV32I-NEXT: lw a0, 8(sp)
+; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -5573,16 +5767,18 @@ define signext i32 @cmpxchg_i32_monotonic_crossbb(ptr %ptr, i32 signext %cmp, i3
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a3, .LBB64_2
; RV64I-NEXT: # %bb.1: # %then
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 4(sp)
-; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 20(sp)
+; RV64I-NEXT: sw a2, 16(sp)
+; RV64I-NEXT: addi a1, sp, 20
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a0, 4(sp)
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: lw a0, 20(sp)
+; RV64I-NEXT: sw a0, 12(sp)
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB64_2: # %else
; RV64I-NEXT: lw a0, 0(a0)
diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
index 634ed45044ee21..3d856c3af668d1 100644
--- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
@@ -35,13 +35,16 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; RV32I-NEXT: neg a2, a1
; RV32I-NEXT: and a2, a2, a0
; RV32I-NEXT: sb a3, 3(sp)
+; RV32I-NEXT: sb a2, 2(sp)
; RV32I-NEXT: addi a1, sp, 3
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 3(sp)
-; RV32I-NEXT: beqz a0, .LBB0_1
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 1(sp)
+; RV32I-NEXT: bnez a0, .LBB0_1
; RV32I-NEXT: # %bb.2: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -99,25 +102,29 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; RV64I-NEXT: .cfi_offset s0, -16
; RV64I-NEXT: .cfi_offset s1, -24
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: andi s1, a1, 255
; RV64I-NEXT: .LBB0_1: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: addi a0, a3, 1
-; RV64I-NEXT: andi a1, a3, 255
+; RV64I-NEXT: addi a0, a2, 1
+; RV64I-NEXT: andi a1, a2, 255
; RV64I-NEXT: sltu a1, a1, s1
-; RV64I-NEXT: neg a2, a1
-; RV64I-NEXT: and a2, a2, a0
-; RV64I-NEXT: sb a3, 7(sp)
+; RV64I-NEXT: negw a1, a1
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: sb a2, 7(sp)
+; RV64I-NEXT: sb a0, 6(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 7(sp)
-; RV64I-NEXT: beqz a0, .LBB0_1
+; RV64I-NEXT: lbu a2, 7(sp)
+; RV64I-NEXT: sb a2, 5(sp)
+; RV64I-NEXT: bnez a0, .LBB0_1
; RV64I-NEXT: # %bb.2: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
@@ -191,13 +198,16 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; RV32I-NEXT: neg a2, a0
; RV32I-NEXT: and a2, a2, a1
; RV32I-NEXT: sh a3, 14(sp)
+; RV32I-NEXT: sh a2, 12(sp)
; RV32I-NEXT: addi a1, sp, 14
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a3, 14(sp)
-; RV32I-NEXT: beqz a0, .LBB1_1
+; RV32I-NEXT: lhu a3, 14(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a3, 10(sp)
+; RV32I-NEXT: bnez a0, .LBB1_1
; RV32I-NEXT: # %bb.2: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -259,27 +269,31 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; RV64I-NEXT: .cfi_offset s1, -24
; RV64I-NEXT: .cfi_offset s2, -32
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lhu a3, 0(a0)
+; RV64I-NEXT: lhu a2, 0(a0)
; RV64I-NEXT: lui s1, 16
; RV64I-NEXT: addiw s1, s1, -1
; RV64I-NEXT: and s2, a1, s1
; RV64I-NEXT: .LBB1_1: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: and a0, a3, s1
-; RV64I-NEXT: addi a1, a3, 1
+; RV64I-NEXT: and a0, a2, s1
+; RV64I-NEXT: addi a1, a2, 1
; RV64I-NEXT: sltu a0, a0, s2
-; RV64I-NEXT: neg a2, a0
-; RV64I-NEXT: and a2, a2, a1
-; RV64I-NEXT: sh a3, 14(sp)
+; RV64I-NEXT: negw a0, a0
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: sh a2, 14(sp)
+; RV64I-NEXT: sh a0, 12(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 14
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a3, 14(sp)
-; RV64I-NEXT: beqz a0, .LBB1_1
+; RV64I-NEXT: lhu a2, 14(sp)
+; RV64I-NEXT: sh a2, 10(sp)
+; RV64I-NEXT: bnez a0, .LBB1_1
; RV64I-NEXT: # %bb.2: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -332,11 +346,11 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; RV32I-LABEL: atomicrmw_uinc_wrap_i32:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: .cfi_offset ra, -4
; RV32I-NEXT: .cfi_offset s0, -8
; RV32I-NEXT: .cfi_offset s1, -12
@@ -349,20 +363,23 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; RV32I-NEXT: sltu a1, a3, s1
; RV32I-NEXT: neg a2, a1
; RV32I-NEXT: and a2, a2, a0
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: beqz a0, .LBB2_1
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: bnez a0, .LBB2_1
; RV32I-NEXT: # %bb.2: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_uinc_wrap_i32:
@@ -392,11 +409,11 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
;
; RV64I-LABEL: atomicrmw_uinc_wrap_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: .cfi_offset ra, -8
; RV64I-NEXT: .cfi_offset s0, -16
; RV64I-NEXT: .cfi_offset s1, -24
@@ -406,23 +423,26 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; RV64I-NEXT: .LBB2_1: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: addiw a0, a3, 1
-; RV64I-NEXT: sltu a1, a3, s1
+; RV64I-NEXT: sext.w a1, a3
+; RV64I-NEXT: sltu a1, a1, s1
; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: and a2, a2, a0
-; RV64I-NEXT: sw a3, 4(sp)
-; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: sw a3, 20(sp)
+; RV64I-NEXT: sw a2, 16(sp)
+; RV64I-NEXT: addi a1, sp, 20
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 4(sp)
-; RV64I-NEXT: beqz a0, .LBB2_1
+; RV64I-NEXT: lwu a3, 20(sp)
+; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: bnez a0, .LBB2_1
; RV64I-NEXT: # %bb.2: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_uinc_wrap_i32:
@@ -457,12 +477,12 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; RV32I-LABEL: atomicrmw_uinc_wrap_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: .cfi_def_cfa_offset 48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: .cfi_offset ra, -4
; RV32I-NEXT: .cfi_offset s0, -8
; RV32I-NEXT: .cfi_offset s1, -12
@@ -484,16 +504,21 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: and a2, a0, a1
; RV32I-NEXT: and a3, a0, a3
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB3_5
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB3_5
; RV32I-NEXT: .LBB3_3: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: bne a5, s1, .LBB3_1
@@ -503,21 +528,21 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; RV32I-NEXT: .LBB3_5: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_uinc_wrap_i64:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: .cfi_def_cfa_offset 32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: .cfi_def_cfa_offset 48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: .cfi_offset ra, -4
; RV32IA-NEXT: .cfi_offset s0, -8
; RV32IA-NEXT: .cfi_offset s1, -12
@@ -539,16 +564,21 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; RV32IA-NEXT: neg a0, a0
; RV32IA-NEXT: and a2, a0, a1
; RV32IA-NEXT: and a3, a0, a3
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB3_5
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB3_5
; RV32IA-NEXT: .LBB3_3: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: bne a5, s1, .LBB3_1
@@ -558,20 +588,20 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; RV32IA-NEXT: .LBB3_5: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_uinc_wrap_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: .cfi_offset ra, -8
; RV64I-NEXT: .cfi_offset s0, -16
; RV64I-NEXT: .cfi_offset s1, -24
@@ -584,20 +614,22 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; RV64I-NEXT: sltu a1, a3, s1
; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: and a2, a2, a0
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: beqz a0, .LBB3_1
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: bnez a0, .LBB3_1
; RV64I-NEXT: # %bb.2: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_uinc_wrap_i64:
@@ -649,13 +681,16 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; RV32I-NEXT: .LBB4_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB4_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: bnez a0, .LBB4_4
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sb a3, 13(sp)
+; RV32I-NEXT: beqz a0, .LBB4_4
; RV32I-NEXT: .LBB4_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: andi a0, a3, 255
@@ -738,34 +773,38 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; RV64I-NEXT: .cfi_offset s1, -24
; RV64I-NEXT: .cfi_offset s2, -32
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: lbu a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: andi s2, a1, 255
; RV64I-NEXT: j .LBB4_2
; RV64I-NEXT: .LBB4_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB4_2 Depth=1
-; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: sb a2, 15(sp)
+; RV64I-NEXT: sb a0, 14(sp)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a2, a0, 56
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a3, 15(sp)
-; RV64I-NEXT: bnez a0, .LBB4_4
+; RV64I-NEXT: lbu a2, 15(sp)
+; RV64I-NEXT: sb a2, 13(sp)
+; RV64I-NEXT: beqz a0, .LBB4_4
; RV64I-NEXT: .LBB4_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: andi a0, a3, 255
+; RV64I-NEXT: andi a0, a2, 255
; RV64I-NEXT: seqz a1, a0
; RV64I-NEXT: sltu a0, s2, a0
-; RV64I-NEXT: or a0, a1, a0
-; RV64I-NEXT: mv a2, s1
-; RV64I-NEXT: bnez a0, .LBB4_1
+; RV64I-NEXT: or a1, a1, a0
+; RV64I-NEXT: mv a0, s1
+; RV64I-NEXT: bnez a1, .LBB4_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB4_2 Depth=1
-; RV64I-NEXT: addi a2, a3, -1
+; RV64I-NEXT: addi a0, a2, -1
; RV64I-NEXT: j .LBB4_1
; RV64I-NEXT: .LBB4_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -849,13 +888,16 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; RV32I-NEXT: .LBB5_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB5_2 Depth=1
; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a1, 10(sp)
-; RV32I-NEXT: bnez a0, .LBB5_4
+; RV32I-NEXT: lhu a1, 10(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sh a1, 6(sp)
+; RV32I-NEXT: beqz a0, .LBB5_4
; RV32I-NEXT: .LBB5_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: and a0, a1, s2
@@ -951,24 +993,28 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; RV64I-NEXT: .LBB5_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB5_2 Depth=1
; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: sh a0, 4(sp)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a1, 6(sp)
-; RV64I-NEXT: bnez a0, .LBB5_4
+; RV64I-NEXT: lhu a1, 6(sp)
+; RV64I-NEXT: sh a1, 2(sp)
+; RV64I-NEXT: beqz a0, .LBB5_4
; RV64I-NEXT: .LBB5_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: and a0, a1, s2
; RV64I-NEXT: seqz a2, a0
; RV64I-NEXT: sltu a0, s3, a0
-; RV64I-NEXT: or a0, a2, a0
-; RV64I-NEXT: mv a2, s0
-; RV64I-NEXT: bnez a0, .LBB5_1
+; RV64I-NEXT: or a2, a2, a0
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: bnez a2, .LBB5_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB5_2 Depth=1
-; RV64I-NEXT: addi a2, a1, -1
+; RV64I-NEXT: addi a0, a1, -1
; RV64I-NEXT: j .LBB5_1
; RV64I-NEXT: .LBB5_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a1
@@ -1035,11 +1081,11 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; RV32I-LABEL: atomicrmw_udec_wrap_i32:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: .cfi_def_cfa_offset 16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: .cfi_offset ra, -4
; RV32I-NEXT: .cfi_offset s0, -8
; RV32I-NEXT: .cfi_offset s1, -12
@@ -1049,14 +1095,17 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; RV32I-NEXT: j .LBB6_2
; RV32I-NEXT: .LBB6_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB6_2 Depth=1
-; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 0(sp)
-; RV32I-NEXT: bnez a0, .LBB6_4
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB6_4
; RV32I-NEXT: .LBB6_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: seqz a0, a3
@@ -1070,10 +1119,10 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; RV32I-NEXT: j .LBB6_1
; RV32I-NEXT: .LBB6_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_udec_wrap_i32:
@@ -1124,33 +1173,37 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; RV64I-NEXT: .cfi_offset s1, -24
; RV64I-NEXT: .cfi_offset s2, -32
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a2, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB6_2
; RV64I-NEXT: .LBB6_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB6_2 Depth=1
-; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: sw a2, 12(sp)
+; RV64I-NEXT: sw a0, 8(sp)
+; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a3, 12(sp)
-; RV64I-NEXT: bnez a0, .LBB6_4
+; RV64I-NEXT: lwu a2, 12(sp)
+; RV64I-NEXT: sw a2, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB6_4
; RV64I-NEXT: .LBB6_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: seqz a0, a3
-; RV64I-NEXT: sltu a1, s2, a3
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: mv a2, s1
-; RV64I-NEXT: bnez a0, .LBB6_1
+; RV64I-NEXT: sext.w a0, a2
+; RV64I-NEXT: seqz a1, a0
+; RV64I-NEXT: sltu a0, s2, a0
+; RV64I-NEXT: or a1, a1, a0
+; RV64I-NEXT: mv a0, s1
+; RV64I-NEXT: bnez a1, .LBB6_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB6_2 Depth=1
-; RV64I-NEXT: addiw a2, a3, -1
+; RV64I-NEXT: addi a0, a2, -1
; RV64I-NEXT: j .LBB6_1
; RV64I-NEXT: .LBB6_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -1200,12 +1253,12 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; RV32I-LABEL: atomicrmw_udec_wrap_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: .cfi_def_cfa_offset 32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -48
+; RV32I-NEXT: .cfi_def_cfa_offset 48
+; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: .cfi_offset ra, -4
; RV32I-NEXT: .cfi_offset s0, -8
; RV32I-NEXT: .cfi_offset s1, -12
@@ -1218,16 +1271,21 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; RV32I-NEXT: j .LBB7_2
; RV32I-NEXT: .LBB7_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a5, 28(sp)
+; RV32I-NEXT: sw a3, 20(sp)
+; RV32I-NEXT: sw a2, 16(sp)
+; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB7_7
+; RV32I-NEXT: lw a5, 28(sp)
+; RV32I-NEXT: lw a4, 24(sp)
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: beqz a0, .LBB7_7
; RV32I-NEXT: .LBB7_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a5, s1, .LBB7_4
@@ -1254,21 +1312,21 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; RV32I-NEXT: .LBB7_7: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_udec_wrap_i64:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: .cfi_def_cfa_offset 32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -48
+; RV32IA-NEXT: .cfi_def_cfa_offset 48
+; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32IA-NEXT: .cfi_offset ra, -4
; RV32IA-NEXT: .cfi_offset s0, -8
; RV32IA-NEXT: .cfi_offset s1, -12
@@ -1281,16 +1339,21 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; RV32IA-NEXT: j .LBB7_2
; RV32IA-NEXT: .LBB7_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: sw a4, 24(sp)
+; RV32IA-NEXT: sw a5, 28(sp)
+; RV32IA-NEXT: sw a3, 20(sp)
+; RV32IA-NEXT: sw a2, 16(sp)
+; RV32IA-NEXT: addi a1, sp, 24
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB7_7
+; RV32IA-NEXT: lw a5, 28(sp)
+; RV32IA-NEXT: lw a4, 24(sp)
+; RV32IA-NEXT: andi a0, a0, 255
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: beqz a0, .LBB7_7
; RV32IA-NEXT: .LBB7_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a5, s1, .LBB7_4
@@ -1317,20 +1380,20 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; RV32IA-NEXT: .LBB7_7: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 48
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_udec_wrap_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: .cfi_def_cfa_offset 32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: .cfi_offset ra, -8
; RV64I-NEXT: .cfi_offset s0, -16
; RV64I-NEXT: .cfi_offset s1, -24
@@ -1340,14 +1403,16 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; RV64I-NEXT: j .LBB7_2
; RV64I-NEXT: .LBB7_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB7_2 Depth=1
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: sd a3, 16(sp)
+; RV64I-NEXT: sd a2, 8(sp)
+; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 0(sp)
-; RV64I-NEXT: bnez a0, .LBB7_4
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB7_4
; RV64I-NEXT: .LBB7_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: seqz a0, a3
@@ -1361,10 +1426,10 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; RV64I-NEXT: j .LBB7_1
; RV64I-NEXT: .LBB7_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_udec_wrap_i64:
diff --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll
index 35900f8a0717aa..b3931315a639e0 100644
--- a/llvm/test/CodeGen/RISCV/forced-atomics.ll
+++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll
@@ -184,12 +184,15 @@ define i8 @cmpxchg8(ptr %p) nounwind {
; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NO-ATOMIC-NEXT: sb zero, 11(sp)
+; RV32-NO-ATOMIC-NEXT: li a1, 1
+; RV32-NO-ATOMIC-NEXT: sb a1, 10(sp)
; RV32-NO-ATOMIC-NEXT: addi a1, sp, 11
; RV32-NO-ATOMIC-NEXT: li a2, 1
; RV32-NO-ATOMIC-NEXT: li a3, 5
; RV32-NO-ATOMIC-NEXT: li a4, 5
; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_1
; RV32-NO-ATOMIC-NEXT: lbu a0, 11(sp)
+; RV32-NO-ATOMIC-NEXT: sb a0, 9(sp)
; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
; RV32-NO-ATOMIC-NEXT: ret
@@ -221,12 +224,15 @@ define i8 @cmpxchg8(ptr %p) nounwind {
; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NO-ATOMIC-NEXT: sb zero, 7(sp)
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: sb a1, 6(sp)
; RV64-NO-ATOMIC-NEXT: addi a1, sp, 7
; RV64-NO-ATOMIC-NEXT: li a2, 1
; RV64-NO-ATOMIC-NEXT: li a3, 5
; RV64-NO-ATOMIC-NEXT: li a4, 5
; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_1
; RV64-NO-ATOMIC-NEXT: lbu a0, 7(sp)
+; RV64-NO-ATOMIC-NEXT: sb a0, 5(sp)
; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
; RV64-NO-ATOMIC-NEXT: ret
@@ -433,12 +439,15 @@ define i16 @cmpxchg16(ptr %p) nounwind {
; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NO-ATOMIC-NEXT: sh zero, 10(sp)
+; RV32-NO-ATOMIC-NEXT: li a1, 1
+; RV32-NO-ATOMIC-NEXT: sh a1, 8(sp)
; RV32-NO-ATOMIC-NEXT: addi a1, sp, 10
; RV32-NO-ATOMIC-NEXT: li a2, 1
; RV32-NO-ATOMIC-NEXT: li a3, 5
; RV32-NO-ATOMIC-NEXT: li a4, 5
; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_2
-; RV32-NO-ATOMIC-NEXT: lh a0, 10(sp)
+; RV32-NO-ATOMIC-NEXT: lhu a0, 10(sp)
+; RV32-NO-ATOMIC-NEXT: sh a0, 6(sp)
; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
; RV32-NO-ATOMIC-NEXT: ret
@@ -470,12 +479,15 @@ define i16 @cmpxchg16(ptr %p) nounwind {
; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NO-ATOMIC-NEXT: sh zero, 6(sp)
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: sh a1, 4(sp)
; RV64-NO-ATOMIC-NEXT: addi a1, sp, 6
; RV64-NO-ATOMIC-NEXT: li a2, 1
; RV64-NO-ATOMIC-NEXT: li a3, 5
; RV64-NO-ATOMIC-NEXT: li a4, 5
; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_2
-; RV64-NO-ATOMIC-NEXT: lh a0, 6(sp)
+; RV64-NO-ATOMIC-NEXT: lhu a0, 6(sp)
+; RV64-NO-ATOMIC-NEXT: sh a0, 2(sp)
; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
; RV64-NO-ATOMIC-NEXT: ret
@@ -1353,22 +1365,25 @@ define i32 @rmw32_xor_seq_cst(ptr %p) nounwind {
define i32 @rmw32_max_seq_cst(ptr %p) nounwind {
; RV32-NO-ATOMIC-LABEL: rmw32_max_seq_cst:
; RV32-NO-ATOMIC: # %bb.0:
-; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
-; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NO-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV32-NO-ATOMIC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32-NO-ATOMIC-NEXT: mv s0, a0
; RV32-NO-ATOMIC-NEXT: lw a1, 0(a0)
; RV32-NO-ATOMIC-NEXT: j .LBB23_2
; RV32-NO-ATOMIC-NEXT: .LBB23_1: # %atomicrmw.start
; RV32-NO-ATOMIC-NEXT: # in Loop: Header=BB23_2 Depth=1
-; RV32-NO-ATOMIC-NEXT: sw a1, 4(sp)
-; RV32-NO-ATOMIC-NEXT: addi a1, sp, 4
+; RV32-NO-ATOMIC-NEXT: sw a1, 20(sp)
+; RV32-NO-ATOMIC-NEXT: sw a2, 16(sp)
+; RV32-NO-ATOMIC-NEXT: addi a1, sp, 20
; RV32-NO-ATOMIC-NEXT: li a3, 5
; RV32-NO-ATOMIC-NEXT: li a4, 5
; RV32-NO-ATOMIC-NEXT: mv a0, s0
; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4
-; RV32-NO-ATOMIC-NEXT: lw a1, 4(sp)
-; RV32-NO-ATOMIC-NEXT: bnez a0, .LBB23_4
+; RV32-NO-ATOMIC-NEXT: lw a1, 20(sp)
+; RV32-NO-ATOMIC-NEXT: andi a0, a0, 255
+; RV32-NO-ATOMIC-NEXT: sw a1, 12(sp)
+; RV32-NO-ATOMIC-NEXT: beqz a0, .LBB23_4
; RV32-NO-ATOMIC-NEXT: .LBB23_2: # %atomicrmw.start
; RV32-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NO-ATOMIC-NEXT: mv a2, a1
@@ -1379,9 +1394,9 @@ define i32 @rmw32_max_seq_cst(ptr %p) nounwind {
; RV32-NO-ATOMIC-NEXT: j .LBB23_1
; RV32-NO-ATOMIC-NEXT: .LBB23_4: # %atomicrmw.end
; RV32-NO-ATOMIC-NEXT: mv a0, a1
-; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NO-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 32
; RV32-NO-ATOMIC-NEXT: ret
;
; RV32-ATOMIC-LABEL: rmw32_max_seq_cst:
@@ -1415,18 +1430,22 @@ define i32 @rmw32_max_seq_cst(ptr %p) nounwind {
; RV64-NO-ATOMIC-NEXT: .LBB23_1: # %atomicrmw.start
; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB23_2 Depth=1
; RV64-NO-ATOMIC-NEXT: sw a1, 12(sp)
+; RV64-NO-ATOMIC-NEXT: sw a2, 8(sp)
+; RV64-NO-ATOMIC-NEXT: sext.w a2, a2
; RV64-NO-ATOMIC-NEXT: addi a1, sp, 12
; RV64-NO-ATOMIC-NEXT: li a3, 5
; RV64-NO-ATOMIC-NEXT: li a4, 5
; RV64-NO-ATOMIC-NEXT: mv a0, s0
; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4
-; RV64-NO-ATOMIC-NEXT: lw a1, 12(sp)
-; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB23_4
+; RV64-NO-ATOMIC-NEXT: lwu a1, 12(sp)
+; RV64-NO-ATOMIC-NEXT: sw a1, 4(sp)
+; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB23_4
; RV64-NO-ATOMIC-NEXT: .LBB23_2: # %atomicrmw.start
; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64-NO-ATOMIC-NEXT: li a0, 1
+; RV64-NO-ATOMIC-NEXT: sext.w a0, a1
+; RV64-NO-ATOMIC-NEXT: li a3, 1
; RV64-NO-ATOMIC-NEXT: mv a2, a1
-; RV64-NO-ATOMIC-NEXT: blt a0, a1, .LBB23_1
+; RV64-NO-ATOMIC-NEXT: blt a3, a0, .LBB23_1
; RV64-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start
; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB23_2 Depth=1
; RV64-NO-ATOMIC-NEXT: li a2, 1
@@ -1464,24 +1483,27 @@ define i32 @rmw32_max_seq_cst(ptr %p) nounwind {
define i32 @rmw32_min_seq_cst(ptr %p) nounwind {
; RV32-NO-ATOMIC-LABEL: rmw32_min_seq_cst:
; RV32-NO-ATOMIC: # %bb.0:
-; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
-; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NO-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32-NO-ATOMIC-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV32-NO-ATOMIC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32-NO-ATOMIC-NEXT: mv s0, a0
; RV32-NO-ATOMIC-NEXT: lw a1, 0(a0)
; RV32-NO-ATOMIC-NEXT: li s1, 2
; RV32-NO-ATOMIC-NEXT: j .LBB24_2
; RV32-NO-ATOMIC-NEXT: .LBB24_1: # %atomicrmw.start
; RV32-NO-ATOMIC-NEXT: # in Loop: Header=BB24_2 Depth=1
-; RV32-NO-ATOMIC-NEXT: sw a1, 0(sp)
-; RV32-NO-ATOMIC-NEXT: mv a1, sp
+; RV32-NO-ATOMIC-NEXT: sw a1, 16(sp)
+; RV32-NO-ATOMIC-NEXT: sw a2, 12(sp)
+; RV32-NO-ATOMIC-NEXT: addi a1, sp, 16
; RV32-NO-ATOMIC-NEXT: li a3, 5
; RV32-NO-ATOMIC-NEXT: li a4, 5
; RV32-NO-ATOMIC-NEXT: mv a0, s0
; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4
-; RV32-NO-ATOMIC-NEXT: lw a1, 0(sp)
-; RV32-NO-ATOMIC-NEXT: bnez a0, .LBB24_4
+; RV32-NO-ATOMIC-NEXT: lw a1, 16(sp)
+; RV32-NO-ATOMIC-NEXT: andi a0, a0, 255
+; RV32-NO-ATOMIC-NEXT: sw a1, 8(sp)
+; RV32-NO-ATOMIC-NEXT: beqz a0, .LBB24_4
; RV32-NO-ATOMIC-NEXT: .LBB24_2: # %atomicrmw.start
; RV32-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NO-ATOMIC-NEXT: mv a2, a1
@@ -1492,10 +1514,10 @@ define i32 @rmw32_min_seq_cst(ptr %p) nounwind {
; RV32-NO-ATOMIC-NEXT: j .LBB24_1
; RV32-NO-ATOMIC-NEXT: .LBB24_4: # %atomicrmw.end
; RV32-NO-ATOMIC-NEXT: mv a0, a1
-; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NO-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32-NO-ATOMIC-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 32
; RV32-NO-ATOMIC-NEXT: ret
;
; RV32-ATOMIC-LABEL: rmw32_min_seq_cst:
@@ -1520,38 +1542,42 @@ define i32 @rmw32_min_seq_cst(ptr %p) nounwind {
;
; RV64-NO-ATOMIC-LABEL: rmw32_min_seq_cst:
; RV64-NO-ATOMIC: # %bb.0:
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
-; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -48
+; RV64-NO-ATOMIC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64-NO-ATOMIC-NEXT: mv s0, a0
; RV64-NO-ATOMIC-NEXT: lw a1, 0(a0)
; RV64-NO-ATOMIC-NEXT: li s1, 2
; RV64-NO-ATOMIC-NEXT: j .LBB24_2
; RV64-NO-ATOMIC-NEXT: .LBB24_1: # %atomicrmw.start
; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB24_2 Depth=1
-; RV64-NO-ATOMIC-NEXT: sw a1, 4(sp)
-; RV64-NO-ATOMIC-NEXT: addi a1, sp, 4
+; RV64-NO-ATOMIC-NEXT: sw a1, 20(sp)
+; RV64-NO-ATOMIC-NEXT: sw a2, 16(sp)
+; RV64-NO-ATOMIC-NEXT: sext.w a2, a2
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 20
; RV64-NO-ATOMIC-NEXT: li a3, 5
; RV64-NO-ATOMIC-NEXT: li a4, 5
; RV64-NO-ATOMIC-NEXT: mv a0, s0
; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4
-; RV64-NO-ATOMIC-NEXT: lw a1, 4(sp)
-; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB24_4
+; RV64-NO-ATOMIC-NEXT: lwu a1, 20(sp)
+; RV64-NO-ATOMIC-NEXT: sw a1, 12(sp)
+; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB24_4
; RV64-NO-ATOMIC-NEXT: .LBB24_2: # %atomicrmw.start
; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-NO-ATOMIC-NEXT: sext.w a0, a1
; RV64-NO-ATOMIC-NEXT: mv a2, a1
-; RV64-NO-ATOMIC-NEXT: blt a1, s1, .LBB24_1
+; RV64-NO-ATOMIC-NEXT: blt a0, s1, .LBB24_1
; RV64-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start
; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB24_2 Depth=1
; RV64-NO-ATOMIC-NEXT: li a2, 1
; RV64-NO-ATOMIC-NEXT: j .LBB24_1
; RV64-NO-ATOMIC-NEXT: .LBB24_4: # %atomicrmw.end
; RV64-NO-ATOMIC-NEXT: mv a0, a1
-; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-NO-ATOMIC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 48
; RV64-NO-ATOMIC-NEXT: ret
;
; RV64-ATOMIC-LABEL: rmw32_min_seq_cst:
@@ -1580,28 +1606,31 @@ define i32 @rmw32_min_seq_cst(ptr %p) nounwind {
define i32 @rmw32_umax_seq_cst(ptr %p) nounwind {
; RV32-NO-ATOMIC-LABEL: rmw32_umax_seq_cst:
; RV32-NO-ATOMIC: # %bb.0:
-; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
-; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NO-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV32-NO-ATOMIC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32-NO-ATOMIC-NEXT: mv s0, a0
; RV32-NO-ATOMIC-NEXT: lw a1, 0(a0)
; RV32-NO-ATOMIC-NEXT: .LBB25_1: # %atomicrmw.start
; RV32-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NO-ATOMIC-NEXT: seqz a2, a1
; RV32-NO-ATOMIC-NEXT: add a2, a1, a2
-; RV32-NO-ATOMIC-NEXT: sw a1, 4(sp)
-; RV32-NO-ATOMIC-NEXT: addi a1, sp, 4
+; RV32-NO-ATOMIC-NEXT: sw a1, 20(sp)
+; RV32-NO-ATOMIC-NEXT: sw a2, 16(sp)
+; RV32-NO-ATOMIC-NEXT: addi a1, sp, 20
; RV32-NO-ATOMIC-NEXT: li a3, 5
; RV32-NO-ATOMIC-NEXT: li a4, 5
; RV32-NO-ATOMIC-NEXT: mv a0, s0
; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4
-; RV32-NO-ATOMIC-NEXT: lw a1, 4(sp)
-; RV32-NO-ATOMIC-NEXT: beqz a0, .LBB25_1
+; RV32-NO-ATOMIC-NEXT: lw a1, 20(sp)
+; RV32-NO-ATOMIC-NEXT: andi a0, a0, 255
+; RV32-NO-ATOMIC-NEXT: sw a1, 12(sp)
+; RV32-NO-ATOMIC-NEXT: bnez a0, .LBB25_1
; RV32-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
; RV32-NO-ATOMIC-NEXT: mv a0, a1
-; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NO-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 32
; RV32-NO-ATOMIC-NEXT: ret
;
; RV32-ATOMIC-LABEL: rmw32_umax_seq_cst:
@@ -1635,18 +1664,22 @@ define i32 @rmw32_umax_seq_cst(ptr %p) nounwind {
; RV64-NO-ATOMIC-NEXT: .LBB25_1: # %atomicrmw.start
; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB25_2 Depth=1
; RV64-NO-ATOMIC-NEXT: sw a1, 12(sp)
+; RV64-NO-ATOMIC-NEXT: sw a2, 8(sp)
+; RV64-NO-ATOMIC-NEXT: sext.w a2, a2
; RV64-NO-ATOMIC-NEXT: addi a1, sp, 12
; RV64-NO-ATOMIC-NEXT: li a3, 5
; RV64-NO-ATOMIC-NEXT: li a4, 5
; RV64-NO-ATOMIC-NEXT: mv a0, s0
; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4
-; RV64-NO-ATOMIC-NEXT: lw a1, 12(sp)
-; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB25_4
+; RV64-NO-ATOMIC-NEXT: lwu a1, 12(sp)
+; RV64-NO-ATOMIC-NEXT: sw a1, 4(sp)
+; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB25_4
; RV64-NO-ATOMIC-NEXT: .LBB25_2: # %atomicrmw.start
; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64-NO-ATOMIC-NEXT: li a0, 1
+; RV64-NO-ATOMIC-NEXT: sext.w a0, a1
+; RV64-NO-ATOMIC-NEXT: li a3, 1
; RV64-NO-ATOMIC-NEXT: mv a2, a1
-; RV64-NO-ATOMIC-NEXT: bltu a0, a1, .LBB25_1
+; RV64-NO-ATOMIC-NEXT: bltu a3, a0, .LBB25_1
; RV64-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start
; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB25_2 Depth=1
; RV64-NO-ATOMIC-NEXT: li a2, 1
@@ -1684,24 +1717,27 @@ define i32 @rmw32_umax_seq_cst(ptr %p) nounwind {
define i32 @rmw32_umin_seq_cst(ptr %p) nounwind {
; RV32-NO-ATOMIC-LABEL: rmw32_umin_seq_cst:
; RV32-NO-ATOMIC: # %bb.0:
-; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
-; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NO-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32-NO-ATOMIC-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV32-NO-ATOMIC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32-NO-ATOMIC-NEXT: mv s0, a0
; RV32-NO-ATOMIC-NEXT: lw a1, 0(a0)
; RV32-NO-ATOMIC-NEXT: li s1, 2
; RV32-NO-ATOMIC-NEXT: j .LBB26_2
; RV32-NO-ATOMIC-NEXT: .LBB26_1: # %atomicrmw.start
; RV32-NO-ATOMIC-NEXT: # in Loop: Header=BB26_2 Depth=1
-; RV32-NO-ATOMIC-NEXT: sw a1, 0(sp)
-; RV32-NO-ATOMIC-NEXT: mv a1, sp
+; RV32-NO-ATOMIC-NEXT: sw a1, 16(sp)
+; RV32-NO-ATOMIC-NEXT: sw a2, 12(sp)
+; RV32-NO-ATOMIC-NEXT: addi a1, sp, 16
; RV32-NO-ATOMIC-NEXT: li a3, 5
; RV32-NO-ATOMIC-NEXT: li a4, 5
; RV32-NO-ATOMIC-NEXT: mv a0, s0
; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4
-; RV32-NO-ATOMIC-NEXT: lw a1, 0(sp)
-; RV32-NO-ATOMIC-NEXT: bnez a0, .LBB26_4
+; RV32-NO-ATOMIC-NEXT: lw a1, 16(sp)
+; RV32-NO-ATOMIC-NEXT: andi a0, a0, 255
+; RV32-NO-ATOMIC-NEXT: sw a1, 8(sp)
+; RV32-NO-ATOMIC-NEXT: beqz a0, .LBB26_4
; RV32-NO-ATOMIC-NEXT: .LBB26_2: # %atomicrmw.start
; RV32-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NO-ATOMIC-NEXT: mv a2, a1
@@ -1712,10 +1748,10 @@ define i32 @rmw32_umin_seq_cst(ptr %p) nounwind {
; RV32-NO-ATOMIC-NEXT: j .LBB26_1
; RV32-NO-ATOMIC-NEXT: .LBB26_4: # %atomicrmw.end
; RV32-NO-ATOMIC-NEXT: mv a0, a1
-; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NO-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32-NO-ATOMIC-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 32
; RV32-NO-ATOMIC-NEXT: ret
;
; RV32-ATOMIC-LABEL: rmw32_umin_seq_cst:
@@ -1740,38 +1776,42 @@ define i32 @rmw32_umin_seq_cst(ptr %p) nounwind {
;
; RV64-NO-ATOMIC-LABEL: rmw32_umin_seq_cst:
; RV64-NO-ATOMIC: # %bb.0:
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
-; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -48
+; RV64-NO-ATOMIC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64-NO-ATOMIC-NEXT: mv s0, a0
; RV64-NO-ATOMIC-NEXT: lw a1, 0(a0)
; RV64-NO-ATOMIC-NEXT: li s1, 2
; RV64-NO-ATOMIC-NEXT: j .LBB26_2
; RV64-NO-ATOMIC-NEXT: .LBB26_1: # %atomicrmw.start
; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB26_2 Depth=1
-; RV64-NO-ATOMIC-NEXT: sw a1, 4(sp)
-; RV64-NO-ATOMIC-NEXT: addi a1, sp, 4
+; RV64-NO-ATOMIC-NEXT: sw a1, 20(sp)
+; RV64-NO-ATOMIC-NEXT: sw a2, 16(sp)
+; RV64-NO-ATOMIC-NEXT: sext.w a2, a2
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 20
; RV64-NO-ATOMIC-NEXT: li a3, 5
; RV64-NO-ATOMIC-NEXT: li a4, 5
; RV64-NO-ATOMIC-NEXT: mv a0, s0
; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4
-; RV64-NO-ATOMIC-NEXT: lw a1, 4(sp)
-; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB26_4
+; RV64-NO-ATOMIC-NEXT: lwu a1, 20(sp)
+; RV64-NO-ATOMIC-NEXT: sw a1, 12(sp)
+; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB26_4
; RV64-NO-ATOMIC-NEXT: .LBB26_2: # %atomicrmw.start
; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-NO-ATOMIC-NEXT: sext.w a0, a1
; RV64-NO-ATOMIC-NEXT: mv a2, a1
-; RV64-NO-ATOMIC-NEXT: bltu a1, s1, .LBB26_1
+; RV64-NO-ATOMIC-NEXT: bltu a0, s1, .LBB26_1
; RV64-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start
; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB26_2 Depth=1
; RV64-NO-ATOMIC-NEXT: li a2, 1
; RV64-NO-ATOMIC-NEXT: j .LBB26_1
; RV64-NO-ATOMIC-NEXT: .LBB26_4: # %atomicrmw.end
; RV64-NO-ATOMIC-NEXT: mv a0, a1
-; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-NO-ATOMIC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 48
; RV64-NO-ATOMIC-NEXT: ret
;
; RV64-ATOMIC-LABEL: rmw32_umin_seq_cst:
@@ -1866,10 +1906,10 @@ define i32 @rmw32_xchg_seq_cst(ptr %p) nounwind {
define float @rmw32_fadd_seq_cst(ptr %p) nounwind {
; RV32-NO-ATOMIC-LABEL: rmw32_fadd_seq_cst:
; RV32-NO-ATOMIC: # %bb.0:
-; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
-; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NO-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32-NO-ATOMIC-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV32-NO-ATOMIC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32-NO-ATOMIC-NEXT: mv s0, a0
; RV32-NO-ATOMIC-NEXT: lw s1, 0(a0)
; RV32-NO-ATOMIC-NEXT: .LBB28_1: # %atomicrmw.start
@@ -1878,20 +1918,23 @@ define float @rmw32_fadd_seq_cst(ptr %p) nounwind {
; RV32-NO-ATOMIC-NEXT: mv a0, s1
; RV32-NO-ATOMIC-NEXT: call __addsf3
; RV32-NO-ATOMIC-NEXT: mv a2, a0
-; RV32-NO-ATOMIC-NEXT: sw s1, 0(sp)
-; RV32-NO-ATOMIC-NEXT: mv a1, sp
+; RV32-NO-ATOMIC-NEXT: sw s1, 16(sp)
+; RV32-NO-ATOMIC-NEXT: sw a0, 12(sp)
+; RV32-NO-ATOMIC-NEXT: addi a1, sp, 16
; RV32-NO-ATOMIC-NEXT: li a3, 5
; RV32-NO-ATOMIC-NEXT: li a4, 5
; RV32-NO-ATOMIC-NEXT: mv a0, s0
; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4
-; RV32-NO-ATOMIC-NEXT: lw s1, 0(sp)
-; RV32-NO-ATOMIC-NEXT: beqz a0, .LBB28_1
+; RV32-NO-ATOMIC-NEXT: lw s1, 16(sp)
+; RV32-NO-ATOMIC-NEXT: andi a0, a0, 255
+; RV32-NO-ATOMIC-NEXT: sw s1, 8(sp)
+; RV32-NO-ATOMIC-NEXT: bnez a0, .LBB28_1
; RV32-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
; RV32-NO-ATOMIC-NEXT: mv a0, s1
-; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NO-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32-NO-ATOMIC-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 32
; RV32-NO-ATOMIC-NEXT: ret
;
; RV32-ATOMIC-LABEL: rmw32_fadd_seq_cst:
@@ -1946,10 +1989,10 @@ define float @rmw32_fadd_seq_cst(ptr %p) nounwind {
;
; RV64-NO-ATOMIC-LABEL: rmw32_fadd_seq_cst:
; RV64-NO-ATOMIC: # %bb.0:
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
-; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -48
+; RV64-NO-ATOMIC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64-NO-ATOMIC-NEXT: mv s0, a0
; RV64-NO-ATOMIC-NEXT: lw s1, 0(a0)
; RV64-NO-ATOMIC-NEXT: .LBB28_1: # %atomicrmw.start
@@ -1957,21 +2000,23 @@ define float @rmw32_fadd_seq_cst(ptr %p) nounwind {
; RV64-NO-ATOMIC-NEXT: lui a1, 260096
; RV64-NO-ATOMIC-NEXT: mv a0, s1
; RV64-NO-ATOMIC-NEXT: call __addsf3
-; RV64-NO-ATOMIC-NEXT: mv a2, a0
-; RV64-NO-ATOMIC-NEXT: sw s1, 4(sp)
-; RV64-NO-ATOMIC-NEXT: addi a1, sp, 4
+; RV64-NO-ATOMIC-NEXT: sw a0, 16(sp)
+; RV64-NO-ATOMIC-NEXT: lw a2, 16(sp)
+; RV64-NO-ATOMIC-NEXT: sw s1, 20(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 20
; RV64-NO-ATOMIC-NEXT: li a3, 5
; RV64-NO-ATOMIC-NEXT: li a4, 5
; RV64-NO-ATOMIC-NEXT: mv a0, s0
; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4
-; RV64-NO-ATOMIC-NEXT: lw s1, 4(sp)
-; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB28_1
+; RV64-NO-ATOMIC-NEXT: lwu s1, 20(sp)
+; RV64-NO-ATOMIC-NEXT: sw s1, 12(sp)
+; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB28_1
; RV64-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
; RV64-NO-ATOMIC-NEXT: mv a0, s1
-; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-NO-ATOMIC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 48
; RV64-NO-ATOMIC-NEXT: ret
;
; RV64-ATOMIC-LABEL: rmw32_fadd_seq_cst:
@@ -2040,10 +2085,10 @@ define float @rmw32_fadd_seq_cst(ptr %p) nounwind {
define float @rmw32_fsub_seq_cst(ptr %p) nounwind {
; RV32-NO-ATOMIC-LABEL: rmw32_fsub_seq_cst:
; RV32-NO-ATOMIC: # %bb.0:
-; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
-; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NO-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32-NO-ATOMIC-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV32-NO-ATOMIC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32-NO-ATOMIC-NEXT: mv s0, a0
; RV32-NO-ATOMIC-NEXT: lw s1, 0(a0)
; RV32-NO-ATOMIC-NEXT: .LBB29_1: # %atomicrmw.start
@@ -2052,20 +2097,23 @@ define float @rmw32_fsub_seq_cst(ptr %p) nounwind {
; RV32-NO-ATOMIC-NEXT: mv a0, s1
; RV32-NO-ATOMIC-NEXT: call __addsf3
; RV32-NO-ATOMIC-NEXT: mv a2, a0
-; RV32-NO-ATOMIC-NEXT: sw s1, 0(sp)
-; RV32-NO-ATOMIC-NEXT: mv a1, sp
+; RV32-NO-ATOMIC-NEXT: sw s1, 16(sp)
+; RV32-NO-ATOMIC-NEXT: sw a0, 12(sp)
+; RV32-NO-ATOMIC-NEXT: addi a1, sp, 16
; RV32-NO-ATOMIC-NEXT: li a3, 5
; RV32-NO-ATOMIC-NEXT: li a4, 5
; RV32-NO-ATOMIC-NEXT: mv a0, s0
; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4
-; RV32-NO-ATOMIC-NEXT: lw s1, 0(sp)
-; RV32-NO-ATOMIC-NEXT: beqz a0, .LBB29_1
+; RV32-NO-ATOMIC-NEXT: lw s1, 16(sp)
+; RV32-NO-ATOMIC-NEXT: andi a0, a0, 255
+; RV32-NO-ATOMIC-NEXT: sw s1, 8(sp)
+; RV32-NO-ATOMIC-NEXT: bnez a0, .LBB29_1
; RV32-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
; RV32-NO-ATOMIC-NEXT: mv a0, s1
-; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NO-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32-NO-ATOMIC-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 32
; RV32-NO-ATOMIC-NEXT: ret
;
; RV32-ATOMIC-LABEL: rmw32_fsub_seq_cst:
@@ -2120,10 +2168,10 @@ define float @rmw32_fsub_seq_cst(ptr %p) nounwind {
;
; RV64-NO-ATOMIC-LABEL: rmw32_fsub_seq_cst:
; RV64-NO-ATOMIC: # %bb.0:
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
-; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -48
+; RV64-NO-ATOMIC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64-NO-ATOMIC-NEXT: mv s0, a0
; RV64-NO-ATOMIC-NEXT: lw s1, 0(a0)
; RV64-NO-ATOMIC-NEXT: .LBB29_1: # %atomicrmw.start
@@ -2131,21 +2179,23 @@ define float @rmw32_fsub_seq_cst(ptr %p) nounwind {
; RV64-NO-ATOMIC-NEXT: lui a1, 784384
; RV64-NO-ATOMIC-NEXT: mv a0, s1
; RV64-NO-ATOMIC-NEXT: call __addsf3
-; RV64-NO-ATOMIC-NEXT: mv a2, a0
-; RV64-NO-ATOMIC-NEXT: sw s1, 4(sp)
-; RV64-NO-ATOMIC-NEXT: addi a1, sp, 4
+; RV64-NO-ATOMIC-NEXT: sw a0, 16(sp)
+; RV64-NO-ATOMIC-NEXT: lw a2, 16(sp)
+; RV64-NO-ATOMIC-NEXT: sw s1, 20(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 20
; RV64-NO-ATOMIC-NEXT: li a3, 5
; RV64-NO-ATOMIC-NEXT: li a4, 5
; RV64-NO-ATOMIC-NEXT: mv a0, s0
; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4
-; RV64-NO-ATOMIC-NEXT: lw s1, 4(sp)
-; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB29_1
+; RV64-NO-ATOMIC-NEXT: lwu s1, 20(sp)
+; RV64-NO-ATOMIC-NEXT: sw s1, 12(sp)
+; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB29_1
; RV64-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
; RV64-NO-ATOMIC-NEXT: mv a0, s1
-; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-NO-ATOMIC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 48
; RV64-NO-ATOMIC-NEXT: ret
;
; RV64-ATOMIC-LABEL: rmw32_fsub_seq_cst:
@@ -2214,10 +2264,10 @@ define float @rmw32_fsub_seq_cst(ptr %p) nounwind {
define float @rmw32_fmin_seq_cst(ptr %p) nounwind {
; RV32-NO-ATOMIC-LABEL: rmw32_fmin_seq_cst:
; RV32-NO-ATOMIC: # %bb.0:
-; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
-; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NO-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32-NO-ATOMIC-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV32-NO-ATOMIC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32-NO-ATOMIC-NEXT: mv s0, a0
; RV32-NO-ATOMIC-NEXT: lw s1, 0(a0)
; RV32-NO-ATOMIC-NEXT: .LBB30_1: # %atomicrmw.start
@@ -2226,20 +2276,23 @@ define float @rmw32_fmin_seq_cst(ptr %p) nounwind {
; RV32-NO-ATOMIC-NEXT: mv a0, s1
; RV32-NO-ATOMIC-NEXT: call fminf
; RV32-NO-ATOMIC-NEXT: mv a2, a0
-; RV32-NO-ATOMIC-NEXT: sw s1, 0(sp)
-; RV32-NO-ATOMIC-NEXT: mv a1, sp
+; RV32-NO-ATOMIC-NEXT: sw s1, 16(sp)
+; RV32-NO-ATOMIC-NEXT: sw a0, 12(sp)
+; RV32-NO-ATOMIC-NEXT: addi a1, sp, 16
; RV32-NO-ATOMIC-NEXT: li a3, 5
; RV32-NO-ATOMIC-NEXT: li a4, 5
; RV32-NO-ATOMIC-NEXT: mv a0, s0
; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4
-; RV32-NO-ATOMIC-NEXT: lw s1, 0(sp)
-; RV32-NO-ATOMIC-NEXT: beqz a0, .LBB30_1
+; RV32-NO-ATOMIC-NEXT: lw s1, 16(sp)
+; RV32-NO-ATOMIC-NEXT: andi a0, a0, 255
+; RV32-NO-ATOMIC-NEXT: sw s1, 8(sp)
+; RV32-NO-ATOMIC-NEXT: bnez a0, .LBB30_1
; RV32-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
; RV32-NO-ATOMIC-NEXT: mv a0, s1
-; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NO-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32-NO-ATOMIC-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 32
; RV32-NO-ATOMIC-NEXT: ret
;
; RV32-ATOMIC-LABEL: rmw32_fmin_seq_cst:
@@ -2294,10 +2347,10 @@ define float @rmw32_fmin_seq_cst(ptr %p) nounwind {
;
; RV64-NO-ATOMIC-LABEL: rmw32_fmin_seq_cst:
; RV64-NO-ATOMIC: # %bb.0:
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
-; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -48
+; RV64-NO-ATOMIC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64-NO-ATOMIC-NEXT: mv s0, a0
; RV64-NO-ATOMIC-NEXT: lw s1, 0(a0)
; RV64-NO-ATOMIC-NEXT: .LBB30_1: # %atomicrmw.start
@@ -2305,21 +2358,23 @@ define float @rmw32_fmin_seq_cst(ptr %p) nounwind {
; RV64-NO-ATOMIC-NEXT: lui a1, 260096
; RV64-NO-ATOMIC-NEXT: mv a0, s1
; RV64-NO-ATOMIC-NEXT: call fminf
-; RV64-NO-ATOMIC-NEXT: mv a2, a0
-; RV64-NO-ATOMIC-NEXT: sw s1, 4(sp)
-; RV64-NO-ATOMIC-NEXT: addi a1, sp, 4
+; RV64-NO-ATOMIC-NEXT: sw a0, 16(sp)
+; RV64-NO-ATOMIC-NEXT: lw a2, 16(sp)
+; RV64-NO-ATOMIC-NEXT: sw s1, 20(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 20
; RV64-NO-ATOMIC-NEXT: li a3, 5
; RV64-NO-ATOMIC-NEXT: li a4, 5
; RV64-NO-ATOMIC-NEXT: mv a0, s0
; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4
-; RV64-NO-ATOMIC-NEXT: lw s1, 4(sp)
-; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB30_1
+; RV64-NO-ATOMIC-NEXT: lwu s1, 20(sp)
+; RV64-NO-ATOMIC-NEXT: sw s1, 12(sp)
+; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB30_1
; RV64-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
; RV64-NO-ATOMIC-NEXT: mv a0, s1
-; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-NO-ATOMIC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 48
; RV64-NO-ATOMIC-NEXT: ret
;
; RV64-ATOMIC-LABEL: rmw32_fmin_seq_cst:
@@ -2388,10 +2443,10 @@ define float @rmw32_fmin_seq_cst(ptr %p) nounwind {
define float @rmw32_fmax_seq_cst(ptr %p) nounwind {
; RV32-NO-ATOMIC-LABEL: rmw32_fmax_seq_cst:
; RV32-NO-ATOMIC: # %bb.0:
-; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
-; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NO-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32-NO-ATOMIC-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV32-NO-ATOMIC-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32-NO-ATOMIC-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32-NO-ATOMIC-NEXT: mv s0, a0
; RV32-NO-ATOMIC-NEXT: lw s1, 0(a0)
; RV32-NO-ATOMIC-NEXT: .LBB31_1: # %atomicrmw.start
@@ -2400,20 +2455,23 @@ define float @rmw32_fmax_seq_cst(ptr %p) nounwind {
; RV32-NO-ATOMIC-NEXT: mv a0, s1
; RV32-NO-ATOMIC-NEXT: call fmaxf
; RV32-NO-ATOMIC-NEXT: mv a2, a0
-; RV32-NO-ATOMIC-NEXT: sw s1, 0(sp)
-; RV32-NO-ATOMIC-NEXT: mv a1, sp
+; RV32-NO-ATOMIC-NEXT: sw s1, 16(sp)
+; RV32-NO-ATOMIC-NEXT: sw a0, 12(sp)
+; RV32-NO-ATOMIC-NEXT: addi a1, sp, 16
; RV32-NO-ATOMIC-NEXT: li a3, 5
; RV32-NO-ATOMIC-NEXT: li a4, 5
; RV32-NO-ATOMIC-NEXT: mv a0, s0
; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4
-; RV32-NO-ATOMIC-NEXT: lw s1, 0(sp)
-; RV32-NO-ATOMIC-NEXT: beqz a0, .LBB31_1
+; RV32-NO-ATOMIC-NEXT: lw s1, 16(sp)
+; RV32-NO-ATOMIC-NEXT: andi a0, a0, 255
+; RV32-NO-ATOMIC-NEXT: sw s1, 8(sp)
+; RV32-NO-ATOMIC-NEXT: bnez a0, .LBB31_1
; RV32-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
; RV32-NO-ATOMIC-NEXT: mv a0, s1
-; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NO-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32-NO-ATOMIC-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV32-NO-ATOMIC-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32-NO-ATOMIC-NEXT: addi sp, sp, 32
; RV32-NO-ATOMIC-NEXT: ret
;
; RV32-ATOMIC-LABEL: rmw32_fmax_seq_cst:
@@ -2468,10 +2526,10 @@ define float @rmw32_fmax_seq_cst(ptr %p) nounwind {
;
; RV64-NO-ATOMIC-LABEL: rmw32_fmax_seq_cst:
; RV64-NO-ATOMIC: # %bb.0:
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
-; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -48
+; RV64-NO-ATOMIC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64-NO-ATOMIC-NEXT: mv s0, a0
; RV64-NO-ATOMIC-NEXT: lw s1, 0(a0)
; RV64-NO-ATOMIC-NEXT: .LBB31_1: # %atomicrmw.start
@@ -2479,21 +2537,23 @@ define float @rmw32_fmax_seq_cst(ptr %p) nounwind {
; RV64-NO-ATOMIC-NEXT: lui a1, 260096
; RV64-NO-ATOMIC-NEXT: mv a0, s1
; RV64-NO-ATOMIC-NEXT: call fmaxf
-; RV64-NO-ATOMIC-NEXT: mv a2, a0
-; RV64-NO-ATOMIC-NEXT: sw s1, 4(sp)
-; RV64-NO-ATOMIC-NEXT: addi a1, sp, 4
+; RV64-NO-ATOMIC-NEXT: sw a0, 16(sp)
+; RV64-NO-ATOMIC-NEXT: lw a2, 16(sp)
+; RV64-NO-ATOMIC-NEXT: sw s1, 20(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 20
; RV64-NO-ATOMIC-NEXT: li a3, 5
; RV64-NO-ATOMIC-NEXT: li a4, 5
; RV64-NO-ATOMIC-NEXT: mv a0, s0
; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4
-; RV64-NO-ATOMIC-NEXT: lw s1, 4(sp)
-; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB31_1
+; RV64-NO-ATOMIC-NEXT: lwu s1, 20(sp)
+; RV64-NO-ATOMIC-NEXT: sw s1, 12(sp)
+; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB31_1
; RV64-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
; RV64-NO-ATOMIC-NEXT: mv a0, s1
-; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-NO-ATOMIC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 48
; RV64-NO-ATOMIC-NEXT: ret
;
; RV64-ATOMIC-LABEL: rmw32_fmax_seq_cst:
@@ -2565,12 +2625,15 @@ define i32 @cmpxchg32_monotonic(ptr %p) nounwind {
; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NO-ATOMIC-NEXT: sw zero, 8(sp)
+; RV32-NO-ATOMIC-NEXT: li a1, 1
+; RV32-NO-ATOMIC-NEXT: sw a1, 4(sp)
; RV32-NO-ATOMIC-NEXT: addi a1, sp, 8
; RV32-NO-ATOMIC-NEXT: li a2, 1
; RV32-NO-ATOMIC-NEXT: li a3, 0
; RV32-NO-ATOMIC-NEXT: li a4, 0
; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4
; RV32-NO-ATOMIC-NEXT: lw a0, 8(sp)
+; RV32-NO-ATOMIC-NEXT: sw a0, 0(sp)
; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
; RV32-NO-ATOMIC-NEXT: ret
@@ -2599,17 +2662,20 @@ define i32 @cmpxchg32_monotonic(ptr %p) nounwind {
;
; RV64-NO-ATOMIC-LABEL: cmpxchg32_monotonic:
; RV64-NO-ATOMIC: # %bb.0:
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
-; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sw zero, 4(sp)
-; RV64-NO-ATOMIC-NEXT: addi a1, sp, 4
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sw zero, 20(sp)
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: sw a1, 16(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 20
; RV64-NO-ATOMIC-NEXT: li a2, 1
; RV64-NO-ATOMIC-NEXT: li a3, 0
; RV64-NO-ATOMIC-NEXT: li a4, 0
; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4
-; RV64-NO-ATOMIC-NEXT: lw a0, 4(sp)
-; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: lwu a0, 20(sp)
+; RV64-NO-ATOMIC-NEXT: sw a0, 12(sp)
+; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
; RV64-NO-ATOMIC-NEXT: ret
;
; RV64-ATOMIC-LABEL: cmpxchg32_monotonic:
@@ -2644,12 +2710,15 @@ define i32 @cmpxchg32_seq_cst(ptr %p) nounwind {
; RV32-NO-ATOMIC-NEXT: addi sp, sp, -16
; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NO-ATOMIC-NEXT: sw zero, 8(sp)
+; RV32-NO-ATOMIC-NEXT: li a1, 1
+; RV32-NO-ATOMIC-NEXT: sw a1, 4(sp)
; RV32-NO-ATOMIC-NEXT: addi a1, sp, 8
; RV32-NO-ATOMIC-NEXT: li a2, 1
; RV32-NO-ATOMIC-NEXT: li a3, 5
; RV32-NO-ATOMIC-NEXT: li a4, 5
; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4
; RV32-NO-ATOMIC-NEXT: lw a0, 8(sp)
+; RV32-NO-ATOMIC-NEXT: sw a0, 0(sp)
; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16
; RV32-NO-ATOMIC-NEXT: ret
@@ -2678,17 +2747,20 @@ define i32 @cmpxchg32_seq_cst(ptr %p) nounwind {
;
; RV64-NO-ATOMIC-LABEL: cmpxchg32_seq_cst:
; RV64-NO-ATOMIC: # %bb.0:
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
-; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sw zero, 4(sp)
-; RV64-NO-ATOMIC-NEXT: addi a1, sp, 4
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sw zero, 20(sp)
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: sw a1, 16(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 20
; RV64-NO-ATOMIC-NEXT: li a2, 1
; RV64-NO-ATOMIC-NEXT: li a3, 5
; RV64-NO-ATOMIC-NEXT: li a4, 5
; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4
-; RV64-NO-ATOMIC-NEXT: lw a0, 4(sp)
-; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: lwu a0, 20(sp)
+; RV64-NO-ATOMIC-NEXT: sw a0, 12(sp)
+; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
; RV64-NO-ATOMIC-NEXT: ret
;
; RV64-ATOMIC-LABEL: cmpxchg32_seq_cst:
@@ -3344,9 +3416,9 @@ define i64 @rmw64_xor_seq_cst(ptr %p) nounwind {
define i64 @rmw64_max_seq_cst(ptr %p) nounwind {
; RV32-LABEL: rmw64_max_seq_cst:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
; RV32-NEXT: lw a1, 4(a0)
; RV32-NEXT: lw a4, 0(a0)
@@ -3355,16 +3427,21 @@ define i64 @rmw64_max_seq_cst(ptr %p) nounwind {
; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1
; RV32-NEXT: neg a3, a0
; RV32-NEXT: and a3, a3, a1
-; RV32-NEXT: sw a4, 0(sp)
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: mv a1, sp
+; RV32-NEXT: sw a4, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
+; RV32-NEXT: sw a2, 8(sp)
+; RV32-NEXT: sw a3, 12(sp)
+; RV32-NEXT: addi a1, sp, 16
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw a1, 4(sp)
-; RV32-NEXT: lw a4, 0(sp)
-; RV32-NEXT: bnez a0, .LBB49_6
+; RV32-NEXT: lw a1, 20(sp)
+; RV32-NEXT: lw a4, 16(sp)
+; RV32-NEXT: andi a0, a0, 255
+; RV32-NEXT: sw a1, 4(sp)
+; RV32-NEXT: sw a4, 0(sp)
+; RV32-NEXT: beqz a0, .LBB49_6
; RV32-NEXT: .LBB49_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: beqz a1, .LBB49_4
@@ -3385,29 +3462,31 @@ define i64 @rmw64_max_seq_cst(ptr %p) nounwind {
; RV32-NEXT: j .LBB49_1
; RV32-NEXT: .LBB49_6: # %atomicrmw.end
; RV32-NEXT: mv a0, a4
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-NO-ATOMIC-LABEL: rmw64_max_seq_cst:
; RV64-NO-ATOMIC: # %bb.0:
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
-; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -48
+; RV64-NO-ATOMIC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; RV64-NO-ATOMIC-NEXT: mv s0, a0
; RV64-NO-ATOMIC-NEXT: ld a1, 0(a0)
; RV64-NO-ATOMIC-NEXT: j .LBB49_2
; RV64-NO-ATOMIC-NEXT: .LBB49_1: # %atomicrmw.start
; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB49_2 Depth=1
-; RV64-NO-ATOMIC-NEXT: sd a1, 8(sp)
-; RV64-NO-ATOMIC-NEXT: addi a1, sp, 8
+; RV64-NO-ATOMIC-NEXT: sd a1, 24(sp)
+; RV64-NO-ATOMIC-NEXT: sd a2, 16(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 24
; RV64-NO-ATOMIC-NEXT: li a3, 5
; RV64-NO-ATOMIC-NEXT: li a4, 5
; RV64-NO-ATOMIC-NEXT: mv a0, s0
; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8
-; RV64-NO-ATOMIC-NEXT: ld a1, 8(sp)
-; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB49_4
+; RV64-NO-ATOMIC-NEXT: ld a1, 24(sp)
+; RV64-NO-ATOMIC-NEXT: sd a1, 8(sp)
+; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB49_4
; RV64-NO-ATOMIC-NEXT: .LBB49_2: # %atomicrmw.start
; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
; RV64-NO-ATOMIC-NEXT: mv a2, a1
@@ -3418,9 +3497,9 @@ define i64 @rmw64_max_seq_cst(ptr %p) nounwind {
; RV64-NO-ATOMIC-NEXT: j .LBB49_1
; RV64-NO-ATOMIC-NEXT: .LBB49_4: # %atomicrmw.end
; RV64-NO-ATOMIC-NEXT: mv a0, a1
-; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-NO-ATOMIC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 48
; RV64-NO-ATOMIC-NEXT: ret
;
; RV64-ATOMIC-LABEL: rmw64_max_seq_cst:
@@ -3449,9 +3528,9 @@ define i64 @rmw64_max_seq_cst(ptr %p) nounwind {
define i64 @rmw64_min_seq_cst(ptr %p) nounwind {
; RV32-LABEL: rmw64_min_seq_cst:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
; RV32-NEXT: lw a1, 4(a0)
; RV32-NEXT: lw a4, 0(a0)
@@ -3460,16 +3539,21 @@ define i64 @rmw64_min_seq_cst(ptr %p) nounwind {
; RV32-NEXT: # in Loop: Header=BB50_2 Depth=1
; RV32-NEXT: neg a3, a0
; RV32-NEXT: and a3, a3, a1
-; RV32-NEXT: sw a4, 0(sp)
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: mv a1, sp
+; RV32-NEXT: sw a4, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
+; RV32-NEXT: sw a2, 8(sp)
+; RV32-NEXT: sw a3, 12(sp)
+; RV32-NEXT: addi a1, sp, 16
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw a1, 4(sp)
-; RV32-NEXT: lw a4, 0(sp)
-; RV32-NEXT: bnez a0, .LBB50_6
+; RV32-NEXT: lw a1, 20(sp)
+; RV32-NEXT: lw a4, 16(sp)
+; RV32-NEXT: andi a0, a0, 255
+; RV32-NEXT: sw a1, 4(sp)
+; RV32-NEXT: sw a4, 0(sp)
+; RV32-NEXT: beqz a0, .LBB50_6
; RV32-NEXT: .LBB50_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: beqz a1, .LBB50_4
@@ -3489,31 +3573,33 @@ define i64 @rmw64_min_seq_cst(ptr %p) nounwind {
; RV32-NEXT: j .LBB50_1
; RV32-NEXT: .LBB50_6: # %atomicrmw.end
; RV32-NEXT: mv a0, a4
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-NO-ATOMIC-LABEL: rmw64_min_seq_cst:
; RV64-NO-ATOMIC: # %bb.0:
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
-; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -48
+; RV64-NO-ATOMIC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64-NO-ATOMIC-NEXT: mv s0, a0
; RV64-NO-ATOMIC-NEXT: ld a1, 0(a0)
; RV64-NO-ATOMIC-NEXT: li s1, 2
; RV64-NO-ATOMIC-NEXT: j .LBB50_2
; RV64-NO-ATOMIC-NEXT: .LBB50_1: # %atomicrmw.start
; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB50_2 Depth=1
-; RV64-NO-ATOMIC-NEXT: sd a1, 0(sp)
-; RV64-NO-ATOMIC-NEXT: mv a1, sp
+; RV64-NO-ATOMIC-NEXT: sd a1, 16(sp)
+; RV64-NO-ATOMIC-NEXT: sd a2, 8(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 16
; RV64-NO-ATOMIC-NEXT: li a3, 5
; RV64-NO-ATOMIC-NEXT: li a4, 5
; RV64-NO-ATOMIC-NEXT: mv a0, s0
; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8
-; RV64-NO-ATOMIC-NEXT: ld a1, 0(sp)
-; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB50_4
+; RV64-NO-ATOMIC-NEXT: ld a1, 16(sp)
+; RV64-NO-ATOMIC-NEXT: sd a1, 0(sp)
+; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB50_4
; RV64-NO-ATOMIC-NEXT: .LBB50_2: # %atomicrmw.start
; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
; RV64-NO-ATOMIC-NEXT: mv a2, a1
@@ -3524,10 +3610,10 @@ define i64 @rmw64_min_seq_cst(ptr %p) nounwind {
; RV64-NO-ATOMIC-NEXT: j .LBB50_1
; RV64-NO-ATOMIC-NEXT: .LBB50_4: # %atomicrmw.end
; RV64-NO-ATOMIC-NEXT: mv a0, a1
-; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-NO-ATOMIC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 48
; RV64-NO-ATOMIC-NEXT: ret
;
; RV64-ATOMIC-LABEL: rmw64_min_seq_cst:
@@ -3556,9 +3642,9 @@ define i64 @rmw64_min_seq_cst(ptr %p) nounwind {
define i64 @rmw64_umax_seq_cst(ptr %p) nounwind {
; RV32-LABEL: rmw64_umax_seq_cst:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
; RV32-NEXT: lw a1, 4(a0)
; RV32-NEXT: lw a4, 0(a0)
@@ -3567,16 +3653,21 @@ define i64 @rmw64_umax_seq_cst(ptr %p) nounwind {
; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1
; RV32-NEXT: neg a3, a0
; RV32-NEXT: and a3, a3, a1
-; RV32-NEXT: sw a4, 0(sp)
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: mv a1, sp
+; RV32-NEXT: sw a4, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
+; RV32-NEXT: sw a2, 8(sp)
+; RV32-NEXT: sw a3, 12(sp)
+; RV32-NEXT: addi a1, sp, 16
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw a1, 4(sp)
-; RV32-NEXT: lw a4, 0(sp)
-; RV32-NEXT: bnez a0, .LBB51_4
+; RV32-NEXT: lw a1, 20(sp)
+; RV32-NEXT: lw a4, 16(sp)
+; RV32-NEXT: andi a0, a0, 255
+; RV32-NEXT: sw a1, 4(sp)
+; RV32-NEXT: sw a4, 0(sp)
+; RV32-NEXT: beqz a0, .LBB51_4
; RV32-NEXT: .LBB51_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: snez a0, a1
@@ -3591,35 +3682,37 @@ define i64 @rmw64_umax_seq_cst(ptr %p) nounwind {
; RV32-NEXT: j .LBB51_1
; RV32-NEXT: .LBB51_4: # %atomicrmw.end
; RV32-NEXT: mv a0, a4
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-NO-ATOMIC-LABEL: rmw64_umax_seq_cst:
; RV64-NO-ATOMIC: # %bb.0:
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
-; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -48
+; RV64-NO-ATOMIC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; RV64-NO-ATOMIC-NEXT: mv s0, a0
; RV64-NO-ATOMIC-NEXT: ld a1, 0(a0)
; RV64-NO-ATOMIC-NEXT: .LBB51_1: # %atomicrmw.start
; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
; RV64-NO-ATOMIC-NEXT: seqz a2, a1
; RV64-NO-ATOMIC-NEXT: add a2, a1, a2
-; RV64-NO-ATOMIC-NEXT: sd a1, 8(sp)
-; RV64-NO-ATOMIC-NEXT: addi a1, sp, 8
+; RV64-NO-ATOMIC-NEXT: sd a1, 24(sp)
+; RV64-NO-ATOMIC-NEXT: sd a2, 16(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 24
; RV64-NO-ATOMIC-NEXT: li a3, 5
; RV64-NO-ATOMIC-NEXT: li a4, 5
; RV64-NO-ATOMIC-NEXT: mv a0, s0
; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8
-; RV64-NO-ATOMIC-NEXT: ld a1, 8(sp)
-; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB51_1
+; RV64-NO-ATOMIC-NEXT: ld a1, 24(sp)
+; RV64-NO-ATOMIC-NEXT: sd a1, 8(sp)
+; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB51_1
; RV64-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
; RV64-NO-ATOMIC-NEXT: mv a0, a1
-; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-NO-ATOMIC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 48
; RV64-NO-ATOMIC-NEXT: ret
;
; RV64-ATOMIC-LABEL: rmw64_umax_seq_cst:
@@ -3648,9 +3741,9 @@ define i64 @rmw64_umax_seq_cst(ptr %p) nounwind {
define i64 @rmw64_umin_seq_cst(ptr %p) nounwind {
; RV32-LABEL: rmw64_umin_seq_cst:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
; RV32-NEXT: lw a1, 4(a0)
; RV32-NEXT: lw a4, 0(a0)
@@ -3659,16 +3752,21 @@ define i64 @rmw64_umin_seq_cst(ptr %p) nounwind {
; RV32-NEXT: # in Loop: Header=BB52_2 Depth=1
; RV32-NEXT: neg a3, a0
; RV32-NEXT: and a3, a3, a1
-; RV32-NEXT: sw a4, 0(sp)
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: mv a1, sp
+; RV32-NEXT: sw a4, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
+; RV32-NEXT: sw a2, 8(sp)
+; RV32-NEXT: sw a3, 12(sp)
+; RV32-NEXT: addi a1, sp, 16
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw a1, 4(sp)
-; RV32-NEXT: lw a4, 0(sp)
-; RV32-NEXT: bnez a0, .LBB52_4
+; RV32-NEXT: lw a1, 20(sp)
+; RV32-NEXT: lw a4, 16(sp)
+; RV32-NEXT: andi a0, a0, 255
+; RV32-NEXT: sw a1, 4(sp)
+; RV32-NEXT: sw a4, 0(sp)
+; RV32-NEXT: beqz a0, .LBB52_4
; RV32-NEXT: .LBB52_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: sltiu a0, a4, 2
@@ -3682,31 +3780,33 @@ define i64 @rmw64_umin_seq_cst(ptr %p) nounwind {
; RV32-NEXT: j .LBB52_1
; RV32-NEXT: .LBB52_4: # %atomicrmw.end
; RV32-NEXT: mv a0, a4
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-NO-ATOMIC-LABEL: rmw64_umin_seq_cst:
; RV64-NO-ATOMIC: # %bb.0:
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
-; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -48
+; RV64-NO-ATOMIC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64-NO-ATOMIC-NEXT: mv s0, a0
; RV64-NO-ATOMIC-NEXT: ld a1, 0(a0)
; RV64-NO-ATOMIC-NEXT: li s1, 2
; RV64-NO-ATOMIC-NEXT: j .LBB52_2
; RV64-NO-ATOMIC-NEXT: .LBB52_1: # %atomicrmw.start
; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB52_2 Depth=1
-; RV64-NO-ATOMIC-NEXT: sd a1, 0(sp)
-; RV64-NO-ATOMIC-NEXT: mv a1, sp
+; RV64-NO-ATOMIC-NEXT: sd a1, 16(sp)
+; RV64-NO-ATOMIC-NEXT: sd a2, 8(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 16
; RV64-NO-ATOMIC-NEXT: li a3, 5
; RV64-NO-ATOMIC-NEXT: li a4, 5
; RV64-NO-ATOMIC-NEXT: mv a0, s0
; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8
-; RV64-NO-ATOMIC-NEXT: ld a1, 0(sp)
-; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB52_4
+; RV64-NO-ATOMIC-NEXT: ld a1, 16(sp)
+; RV64-NO-ATOMIC-NEXT: sd a1, 0(sp)
+; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB52_4
; RV64-NO-ATOMIC-NEXT: .LBB52_2: # %atomicrmw.start
; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
; RV64-NO-ATOMIC-NEXT: mv a2, a1
@@ -3717,10 +3817,10 @@ define i64 @rmw64_umin_seq_cst(ptr %p) nounwind {
; RV64-NO-ATOMIC-NEXT: j .LBB52_1
; RV64-NO-ATOMIC-NEXT: .LBB52_4: # %atomicrmw.end
; RV64-NO-ATOMIC-NEXT: mv a0, a1
-; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
+; RV64-NO-ATOMIC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 48
; RV64-NO-ATOMIC-NEXT: ret
;
; RV64-ATOMIC-LABEL: rmw64_umin_seq_cst:
@@ -3796,11 +3896,11 @@ define i64 @rmw64_xchg_seq_cst(ptr %p) nounwind {
define double @rmw64_fadd_seq_cst(ptr %p) nounwind {
; RV32-LABEL: rmw64_fadd_seq_cst:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32-NEXT: addi sp, sp, -48
+; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
; RV32-NEXT: lw s1, 4(a0)
; RV32-NEXT: lw s2, 0(a0)
@@ -3813,33 +3913,38 @@ define double @rmw64_fadd_seq_cst(ptr %p) nounwind {
; RV32-NEXT: call __adddf3
; RV32-NEXT: mv a2, a0
; RV32-NEXT: mv a3, a1
-; RV32-NEXT: sw s2, 8(sp)
-; RV32-NEXT: sw s1, 12(sp)
-; RV32-NEXT: addi a1, sp, 8
+; RV32-NEXT: sw s2, 24(sp)
+; RV32-NEXT: sw s1, 28(sp)
+; RV32-NEXT: sw a1, 20(sp)
+; RV32-NEXT: sw a0, 16(sp)
+; RV32-NEXT: addi a1, sp, 24
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw s1, 12(sp)
-; RV32-NEXT: lw s2, 8(sp)
-; RV32-NEXT: beqz a0, .LBB54_1
+; RV32-NEXT: lw s1, 28(sp)
+; RV32-NEXT: lw s2, 24(sp)
+; RV32-NEXT: andi a0, a0, 255
+; RV32-NEXT: sw s1, 12(sp)
+; RV32-NEXT: sw s2, 8(sp)
+; RV32-NEXT: bnez a0, .LBB54_1
; RV32-NEXT: # %bb.2: # %atomicrmw.end
; RV32-NEXT: mv a0, s2
; RV32-NEXT: mv a1, s1
-; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 48
; RV32-NEXT: ret
;
; RV64-NO-ATOMIC-LABEL: rmw64_fadd_seq_cst:
; RV64-NO-ATOMIC: # %bb.0:
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, -48
-; RV64-NO-ATOMIC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -64
+; RV64-NO-ATOMIC-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
; RV64-NO-ATOMIC-NEXT: mv s0, a0
; RV64-NO-ATOMIC-NEXT: ld s2, 0(a0)
; RV64-NO-ATOMIC-NEXT: li s1, 1023
@@ -3850,21 +3955,23 @@ define double @rmw64_fadd_seq_cst(ptr %p) nounwind {
; RV64-NO-ATOMIC-NEXT: mv a1, s1
; RV64-NO-ATOMIC-NEXT: call __adddf3
; RV64-NO-ATOMIC-NEXT: mv a2, a0
-; RV64-NO-ATOMIC-NEXT: sd s2, 8(sp)
-; RV64-NO-ATOMIC-NEXT: addi a1, sp, 8
+; RV64-NO-ATOMIC-NEXT: sd s2, 24(sp)
+; RV64-NO-ATOMIC-NEXT: sd a0, 16(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 24
; RV64-NO-ATOMIC-NEXT: li a3, 5
; RV64-NO-ATOMIC-NEXT: li a4, 5
; RV64-NO-ATOMIC-NEXT: mv a0, s0
; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8
-; RV64-NO-ATOMIC-NEXT: ld s2, 8(sp)
-; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB54_1
+; RV64-NO-ATOMIC-NEXT: ld s2, 24(sp)
+; RV64-NO-ATOMIC-NEXT: sd s2, 8(sp)
+; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB54_1
; RV64-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
; RV64-NO-ATOMIC-NEXT: mv a0, s2
-; RV64-NO-ATOMIC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, 48
+; RV64-NO-ATOMIC-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 64
; RV64-NO-ATOMIC-NEXT: ret
;
; RV64-ATOMIC-LABEL: rmw64_fadd_seq_cst:
@@ -3931,11 +4038,11 @@ define double @rmw64_fadd_seq_cst(ptr %p) nounwind {
define double @rmw64_fsub_seq_cst(ptr %p) nounwind {
; RV32-LABEL: rmw64_fsub_seq_cst:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32-NEXT: addi sp, sp, -48
+; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
; RV32-NEXT: lw s1, 4(a0)
; RV32-NEXT: lw s2, 0(a0)
@@ -3948,33 +4055,38 @@ define double @rmw64_fsub_seq_cst(ptr %p) nounwind {
; RV32-NEXT: call __adddf3
; RV32-NEXT: mv a2, a0
; RV32-NEXT: mv a3, a1
-; RV32-NEXT: sw s2, 8(sp)
-; RV32-NEXT: sw s1, 12(sp)
-; RV32-NEXT: addi a1, sp, 8
+; RV32-NEXT: sw s2, 24(sp)
+; RV32-NEXT: sw s1, 28(sp)
+; RV32-NEXT: sw a1, 20(sp)
+; RV32-NEXT: sw a0, 16(sp)
+; RV32-NEXT: addi a1, sp, 24
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw s1, 12(sp)
-; RV32-NEXT: lw s2, 8(sp)
-; RV32-NEXT: beqz a0, .LBB55_1
+; RV32-NEXT: lw s1, 28(sp)
+; RV32-NEXT: lw s2, 24(sp)
+; RV32-NEXT: andi a0, a0, 255
+; RV32-NEXT: sw s1, 12(sp)
+; RV32-NEXT: sw s2, 8(sp)
+; RV32-NEXT: bnez a0, .LBB55_1
; RV32-NEXT: # %bb.2: # %atomicrmw.end
; RV32-NEXT: mv a0, s2
; RV32-NEXT: mv a1, s1
-; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 48
; RV32-NEXT: ret
;
; RV64-NO-ATOMIC-LABEL: rmw64_fsub_seq_cst:
; RV64-NO-ATOMIC: # %bb.0:
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, -48
-; RV64-NO-ATOMIC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -64
+; RV64-NO-ATOMIC-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
; RV64-NO-ATOMIC-NEXT: mv s0, a0
; RV64-NO-ATOMIC-NEXT: ld s2, 0(a0)
; RV64-NO-ATOMIC-NEXT: li s1, -1025
@@ -3985,21 +4097,23 @@ define double @rmw64_fsub_seq_cst(ptr %p) nounwind {
; RV64-NO-ATOMIC-NEXT: mv a1, s1
; RV64-NO-ATOMIC-NEXT: call __adddf3
; RV64-NO-ATOMIC-NEXT: mv a2, a0
-; RV64-NO-ATOMIC-NEXT: sd s2, 8(sp)
-; RV64-NO-ATOMIC-NEXT: addi a1, sp, 8
+; RV64-NO-ATOMIC-NEXT: sd s2, 24(sp)
+; RV64-NO-ATOMIC-NEXT: sd a0, 16(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 24
; RV64-NO-ATOMIC-NEXT: li a3, 5
; RV64-NO-ATOMIC-NEXT: li a4, 5
; RV64-NO-ATOMIC-NEXT: mv a0, s0
; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8
-; RV64-NO-ATOMIC-NEXT: ld s2, 8(sp)
-; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB55_1
+; RV64-NO-ATOMIC-NEXT: ld s2, 24(sp)
+; RV64-NO-ATOMIC-NEXT: sd s2, 8(sp)
+; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB55_1
; RV64-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
; RV64-NO-ATOMIC-NEXT: mv a0, s2
-; RV64-NO-ATOMIC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, 48
+; RV64-NO-ATOMIC-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 64
; RV64-NO-ATOMIC-NEXT: ret
;
; RV64-ATOMIC-LABEL: rmw64_fsub_seq_cst:
@@ -4066,11 +4180,11 @@ define double @rmw64_fsub_seq_cst(ptr %p) nounwind {
define double @rmw64_fmin_seq_cst(ptr %p) nounwind {
; RV32-LABEL: rmw64_fmin_seq_cst:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32-NEXT: addi sp, sp, -48
+; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
; RV32-NEXT: lw s1, 4(a0)
; RV32-NEXT: lw s2, 0(a0)
@@ -4083,33 +4197,38 @@ define double @rmw64_fmin_seq_cst(ptr %p) nounwind {
; RV32-NEXT: call fmin
; RV32-NEXT: mv a2, a0
; RV32-NEXT: mv a3, a1
-; RV32-NEXT: sw s2, 8(sp)
-; RV32-NEXT: sw s1, 12(sp)
-; RV32-NEXT: addi a1, sp, 8
+; RV32-NEXT: sw s2, 24(sp)
+; RV32-NEXT: sw s1, 28(sp)
+; RV32-NEXT: sw a1, 20(sp)
+; RV32-NEXT: sw a0, 16(sp)
+; RV32-NEXT: addi a1, sp, 24
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw s1, 12(sp)
-; RV32-NEXT: lw s2, 8(sp)
-; RV32-NEXT: beqz a0, .LBB56_1
+; RV32-NEXT: lw s1, 28(sp)
+; RV32-NEXT: lw s2, 24(sp)
+; RV32-NEXT: andi a0, a0, 255
+; RV32-NEXT: sw s1, 12(sp)
+; RV32-NEXT: sw s2, 8(sp)
+; RV32-NEXT: bnez a0, .LBB56_1
; RV32-NEXT: # %bb.2: # %atomicrmw.end
; RV32-NEXT: mv a0, s2
; RV32-NEXT: mv a1, s1
-; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 48
; RV32-NEXT: ret
;
; RV64-NO-ATOMIC-LABEL: rmw64_fmin_seq_cst:
; RV64-NO-ATOMIC: # %bb.0:
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, -48
-; RV64-NO-ATOMIC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -64
+; RV64-NO-ATOMIC-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
; RV64-NO-ATOMIC-NEXT: mv s0, a0
; RV64-NO-ATOMIC-NEXT: ld s2, 0(a0)
; RV64-NO-ATOMIC-NEXT: li s1, 1023
@@ -4120,21 +4239,23 @@ define double @rmw64_fmin_seq_cst(ptr %p) nounwind {
; RV64-NO-ATOMIC-NEXT: mv a1, s1
; RV64-NO-ATOMIC-NEXT: call fmin
; RV64-NO-ATOMIC-NEXT: mv a2, a0
-; RV64-NO-ATOMIC-NEXT: sd s2, 8(sp)
-; RV64-NO-ATOMIC-NEXT: addi a1, sp, 8
+; RV64-NO-ATOMIC-NEXT: sd s2, 24(sp)
+; RV64-NO-ATOMIC-NEXT: sd a0, 16(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 24
; RV64-NO-ATOMIC-NEXT: li a3, 5
; RV64-NO-ATOMIC-NEXT: li a4, 5
; RV64-NO-ATOMIC-NEXT: mv a0, s0
; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8
-; RV64-NO-ATOMIC-NEXT: ld s2, 8(sp)
-; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB56_1
+; RV64-NO-ATOMIC-NEXT: ld s2, 24(sp)
+; RV64-NO-ATOMIC-NEXT: sd s2, 8(sp)
+; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB56_1
; RV64-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
; RV64-NO-ATOMIC-NEXT: mv a0, s2
-; RV64-NO-ATOMIC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, 48
+; RV64-NO-ATOMIC-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 64
; RV64-NO-ATOMIC-NEXT: ret
;
; RV64-ATOMIC-LABEL: rmw64_fmin_seq_cst:
@@ -4201,11 +4322,11 @@ define double @rmw64_fmin_seq_cst(ptr %p) nounwind {
define double @rmw64_fmax_seq_cst(ptr %p) nounwind {
; RV32-LABEL: rmw64_fmax_seq_cst:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32-NEXT: addi sp, sp, -48
+; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
; RV32-NEXT: lw s1, 4(a0)
; RV32-NEXT: lw s2, 0(a0)
@@ -4218,33 +4339,38 @@ define double @rmw64_fmax_seq_cst(ptr %p) nounwind {
; RV32-NEXT: call fmax
; RV32-NEXT: mv a2, a0
; RV32-NEXT: mv a3, a1
-; RV32-NEXT: sw s2, 8(sp)
-; RV32-NEXT: sw s1, 12(sp)
-; RV32-NEXT: addi a1, sp, 8
+; RV32-NEXT: sw s2, 24(sp)
+; RV32-NEXT: sw s1, 28(sp)
+; RV32-NEXT: sw a1, 20(sp)
+; RV32-NEXT: sw a0, 16(sp)
+; RV32-NEXT: addi a1, sp, 24
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw s1, 12(sp)
-; RV32-NEXT: lw s2, 8(sp)
-; RV32-NEXT: beqz a0, .LBB57_1
+; RV32-NEXT: lw s1, 28(sp)
+; RV32-NEXT: lw s2, 24(sp)
+; RV32-NEXT: andi a0, a0, 255
+; RV32-NEXT: sw s1, 12(sp)
+; RV32-NEXT: sw s2, 8(sp)
+; RV32-NEXT: bnez a0, .LBB57_1
; RV32-NEXT: # %bb.2: # %atomicrmw.end
; RV32-NEXT: mv a0, s2
; RV32-NEXT: mv a1, s1
-; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 48
; RV32-NEXT: ret
;
; RV64-NO-ATOMIC-LABEL: rmw64_fmax_seq_cst:
; RV64-NO-ATOMIC: # %bb.0:
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, -48
-; RV64-NO-ATOMIC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -64
+; RV64-NO-ATOMIC-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
; RV64-NO-ATOMIC-NEXT: mv s0, a0
; RV64-NO-ATOMIC-NEXT: ld s2, 0(a0)
; RV64-NO-ATOMIC-NEXT: li s1, 1023
@@ -4255,21 +4381,23 @@ define double @rmw64_fmax_seq_cst(ptr %p) nounwind {
; RV64-NO-ATOMIC-NEXT: mv a1, s1
; RV64-NO-ATOMIC-NEXT: call fmax
; RV64-NO-ATOMIC-NEXT: mv a2, a0
-; RV64-NO-ATOMIC-NEXT: sd s2, 8(sp)
-; RV64-NO-ATOMIC-NEXT: addi a1, sp, 8
+; RV64-NO-ATOMIC-NEXT: sd s2, 24(sp)
+; RV64-NO-ATOMIC-NEXT: sd a0, 16(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 24
; RV64-NO-ATOMIC-NEXT: li a3, 5
; RV64-NO-ATOMIC-NEXT: li a4, 5
; RV64-NO-ATOMIC-NEXT: mv a0, s0
; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8
-; RV64-NO-ATOMIC-NEXT: ld s2, 8(sp)
-; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB57_1
+; RV64-NO-ATOMIC-NEXT: ld s2, 24(sp)
+; RV64-NO-ATOMIC-NEXT: sd s2, 8(sp)
+; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB57_1
; RV64-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end
; RV64-NO-ATOMIC-NEXT: mv a0, s2
-; RV64-NO-ATOMIC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, 48
+; RV64-NO-ATOMIC-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 64
; RV64-NO-ATOMIC-NEXT: ret
;
; RV64-ATOMIC-LABEL: rmw64_fmax_seq_cst:
@@ -4336,35 +4464,43 @@ define double @rmw64_fmax_seq_cst(ptr %p) nounwind {
define i64 @cmpxchg64_monotonic(ptr %p) nounwind {
; RV32-LABEL: cmpxchg64_monotonic:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw zero, 4(sp)
-; RV32-NEXT: sw zero, 0(sp)
-; RV32-NEXT: mv a1, sp
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw zero, 20(sp)
+; RV32-NEXT: sw zero, 16(sp)
+; RV32-NEXT: sw zero, 12(sp)
+; RV32-NEXT: li a1, 1
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: addi a1, sp, 16
; RV32-NEXT: li a2, 1
; RV32-NEXT: li a3, 0
; RV32-NEXT: li a4, 0
; RV32-NEXT: li a5, 0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw a1, 4(sp)
-; RV32-NEXT: lw a0, 0(sp)
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: lw a1, 20(sp)
+; RV32-NEXT: lw a0, 16(sp)
+; RV32-NEXT: sw a1, 4(sp)
+; RV32-NEXT: sw a0, 0(sp)
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-NO-ATOMIC-LABEL: cmpxchg64_monotonic:
; RV64-NO-ATOMIC: # %bb.0:
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
-; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd zero, 0(sp)
-; RV64-NO-ATOMIC-NEXT: mv a1, sp
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd zero, 16(sp)
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: sd a1, 8(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 16
; RV64-NO-ATOMIC-NEXT: li a2, 1
; RV64-NO-ATOMIC-NEXT: li a3, 0
; RV64-NO-ATOMIC-NEXT: li a4, 0
; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8
-; RV64-NO-ATOMIC-NEXT: ld a0, 0(sp)
-; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ld a0, 16(sp)
+; RV64-NO-ATOMIC-NEXT: sd a0, 0(sp)
+; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
; RV64-NO-ATOMIC-NEXT: ret
;
; RV64-ATOMIC-LABEL: cmpxchg64_monotonic:
@@ -4396,35 +4532,43 @@ define i64 @cmpxchg64_monotonic(ptr %p) nounwind {
define i64 @cmpxchg64_seq_cst(ptr %p) nounwind {
; RV32-LABEL: cmpxchg64_seq_cst:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw zero, 4(sp)
-; RV32-NEXT: sw zero, 0(sp)
-; RV32-NEXT: mv a1, sp
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw zero, 20(sp)
+; RV32-NEXT: sw zero, 16(sp)
+; RV32-NEXT: sw zero, 12(sp)
+; RV32-NEXT: li a1, 1
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: addi a1, sp, 16
; RV32-NEXT: li a2, 1
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw a1, 4(sp)
-; RV32-NEXT: lw a0, 0(sp)
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: lw a1, 20(sp)
+; RV32-NEXT: lw a0, 16(sp)
+; RV32-NEXT: sw a1, 4(sp)
+; RV32-NEXT: sw a0, 0(sp)
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-NO-ATOMIC-LABEL: cmpxchg64_seq_cst:
; RV64-NO-ATOMIC: # %bb.0:
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, -16
-; RV64-NO-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NO-ATOMIC-NEXT: sd zero, 0(sp)
-; RV64-NO-ATOMIC-NEXT: mv a1, sp
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, -32
+; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-NO-ATOMIC-NEXT: sd zero, 16(sp)
+; RV64-NO-ATOMIC-NEXT: li a1, 1
+; RV64-NO-ATOMIC-NEXT: sd a1, 8(sp)
+; RV64-NO-ATOMIC-NEXT: addi a1, sp, 16
; RV64-NO-ATOMIC-NEXT: li a2, 1
; RV64-NO-ATOMIC-NEXT: li a3, 5
; RV64-NO-ATOMIC-NEXT: li a4, 5
; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8
-; RV64-NO-ATOMIC-NEXT: ld a0, 0(sp)
-; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16
+; RV64-NO-ATOMIC-NEXT: ld a0, 16(sp)
+; RV64-NO-ATOMIC-NEXT: sd a0, 0(sp)
+; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32
; RV64-NO-ATOMIC-NEXT: ret
;
; RV64-ATOMIC-LABEL: cmpxchg64_seq_cst:
@@ -4526,10 +4670,10 @@ define void @store128(ptr %p) nounwind {
define i128 @rmw128(ptr %p) nounwind {
; RV32-LABEL: rmw128:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -48
-; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT: addi sp, sp, -64
+; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s1, 52(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a1
; RV32-NEXT: lw a1, 12(a1)
; RV32-NEXT: lw a2, 8(s0)
@@ -4546,35 +4690,40 @@ define i128 @rmw128(ptr %p) nounwind {
; RV32-NEXT: add a6, a2, a6
; RV32-NEXT: sltu a7, a6, a2
; RV32-NEXT: add a7, a1, a7
-; RV32-NEXT: sw a4, 16(sp)
-; RV32-NEXT: sw a3, 20(sp)
-; RV32-NEXT: sw a2, 24(sp)
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a5, 4(sp)
-; RV32-NEXT: sw a0, 0(sp)
-; RV32-NEXT: sw a6, 8(sp)
-; RV32-NEXT: sw a7, 12(sp)
+; RV32-NEXT: sw a4, 32(sp)
+; RV32-NEXT: sw a3, 36(sp)
+; RV32-NEXT: sw a2, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
+; RV32-NEXT: sw a0, 16(sp)
+; RV32-NEXT: sw a5, 20(sp)
+; RV32-NEXT: sw a6, 24(sp)
+; RV32-NEXT: sw a7, 28(sp)
; RV32-NEXT: li a0, 16
-; RV32-NEXT: addi a2, sp, 16
-; RV32-NEXT: mv a3, sp
+; RV32-NEXT: addi a2, sp, 32
+; RV32-NEXT: addi a3, sp, 16
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a1, s0
; RV32-NEXT: call __atomic_compare_exchange
-; RV32-NEXT: lw a1, 28(sp)
-; RV32-NEXT: lw a2, 24(sp)
-; RV32-NEXT: lw a3, 20(sp)
-; RV32-NEXT: lw a4, 16(sp)
-; RV32-NEXT: beqz a0, .LBB62_1
+; RV32-NEXT: andi a0, a0, 255
+; RV32-NEXT: lw a1, 44(sp)
+; RV32-NEXT: lw a2, 40(sp)
+; RV32-NEXT: lw a3, 36(sp)
+; RV32-NEXT: lw a4, 32(sp)
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a2, 8(sp)
+; RV32-NEXT: sw a3, 4(sp)
+; RV32-NEXT: sw a4, 0(sp)
+; RV32-NEXT: bnez a0, .LBB62_1
; RV32-NEXT: # %bb.2: # %atomicrmw.end
; RV32-NEXT: sw a4, 0(s1)
; RV32-NEXT: sw a3, 4(s1)
; RV32-NEXT: sw a2, 8(s1)
; RV32-NEXT: sw a1, 12(s1)
-; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 48
+; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s1, 52(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 64
; RV32-NEXT: ret
;
; RV64-LABEL: rmw128:
@@ -4595,54 +4744,63 @@ define i128 @rmw128(ptr %p) nounwind {
define i128 @cmpxchg128(ptr %p) nounwind {
; RV32-LABEL: cmpxchg128:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -48
-; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT: addi sp, sp, -64
+; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
+; RV32-NEXT: sw zero, 52(sp)
+; RV32-NEXT: sw zero, 48(sp)
+; RV32-NEXT: sw zero, 44(sp)
+; RV32-NEXT: sw zero, 40(sp)
; RV32-NEXT: sw zero, 36(sp)
; RV32-NEXT: sw zero, 32(sp)
; RV32-NEXT: sw zero, 28(sp)
-; RV32-NEXT: sw zero, 24(sp)
-; RV32-NEXT: sw zero, 20(sp)
-; RV32-NEXT: sw zero, 16(sp)
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a0, 1
-; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a0, 24(sp)
; RV32-NEXT: li a0, 16
-; RV32-NEXT: addi a2, sp, 24
-; RV32-NEXT: addi a3, sp, 8
+; RV32-NEXT: addi a2, sp, 40
+; RV32-NEXT: addi a3, sp, 24
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: call __atomic_compare_exchange
-; RV32-NEXT: lw a0, 36(sp)
-; RV32-NEXT: lw a1, 32(sp)
-; RV32-NEXT: lw a2, 28(sp)
-; RV32-NEXT: lw a3, 24(sp)
-; RV32-NEXT: sw a0, 12(s0)
-; RV32-NEXT: sw a1, 8(s0)
-; RV32-NEXT: sw a2, 4(s0)
-; RV32-NEXT: sw a3, 0(s0)
-; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 48
+; RV32-NEXT: lw a0, 48(sp)
+; RV32-NEXT: lw a1, 44(sp)
+; RV32-NEXT: lw a2, 40(sp)
+; RV32-NEXT: lw a3, 52(sp)
+; RV32-NEXT: sw a0, 16(sp)
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a2, 8(sp)
+; RV32-NEXT: sw a3, 20(sp)
+; RV32-NEXT: sw a0, 8(s0)
+; RV32-NEXT: sw a3, 12(s0)
+; RV32-NEXT: sw a2, 0(s0)
+; RV32-NEXT: sw a1, 4(s0)
+; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 64
; RV32-NEXT: ret
;
; RV64-LABEL: cmpxchg128:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -32
-; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd zero, 8(sp)
-; RV64-NEXT: sd zero, 0(sp)
-; RV64-NEXT: mv a1, sp
+; RV64-NEXT: addi sp, sp, -64
+; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd zero, 40(sp)
+; RV64-NEXT: sd zero, 32(sp)
+; RV64-NEXT: sd zero, 24(sp)
+; RV64-NEXT: li a1, 1
+; RV64-NEXT: sd a1, 16(sp)
+; RV64-NEXT: addi a1, sp, 32
; RV64-NEXT: li a2, 1
; RV64-NEXT: li a4, 5
; RV64-NEXT: li a5, 5
; RV64-NEXT: li a3, 0
; RV64-NEXT: call __atomic_compare_exchange_16
-; RV64-NEXT: ld a1, 8(sp)
-; RV64-NEXT: ld a0, 0(sp)
-; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 32
+; RV64-NEXT: ld a1, 40(sp)
+; RV64-NEXT: ld a0, 32(sp)
+; RV64-NEXT: sd a1, 8(sp)
+; RV64-NEXT: sd a0, 0(sp)
+; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 64
; RV64-NEXT: ret
%res = cmpxchg ptr %p, i128 0, i128 1 seq_cst seq_cst
%res.0 = extractvalue { i128, i1 } %res, 0
diff --git a/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll
index 0f9feeb17716af..e62d4003fd9c92 100644
--- a/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll
@@ -117,7 +117,7 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; CHECK-LABEL: atomicrmw_uinc_wrap_i64:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: save %sp, -104, %sp
+; CHECK-NEXT: save %sp, -120, %sp
; CHECK-NEXT: .cfi_def_cfa_register %fp
; CHECK-NEXT: .cfi_window_save
; CHECK-NEXT: .cfi_register %o7, %i7
@@ -137,17 +137,20 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; CHECK-NEXT: cmp %g2, %i1
; CHECK-NEXT: move %icc, %g4, %i5
; CHECK-NEXT: cmp %i5, 0
-; CHECK-NEXT: movne %icc, 0, %o2
; CHECK-NEXT: movne %icc, 0, %o3
+; CHECK-NEXT: movne %icc, 0, %o2
; CHECK-NEXT: std %g2, [%fp+-8]
+; CHECK-NEXT: std %o2, [%fp+-16]
; CHECK-NEXT: mov %i0, %o0
; CHECK-NEXT: mov %i3, %o1
; CHECK-NEXT: mov %i4, %o4
; CHECK-NEXT: call __atomic_compare_exchange_8
; CHECK-NEXT: mov %i4, %o5
-; CHECK-NEXT: cmp %o0, 0
-; CHECK-NEXT: be %icc, .LBB3_1
; CHECK-NEXT: ldd [%fp+-8], %g2
+; CHECK-NEXT: and %o0, 255, %i5
+; CHECK-NEXT: cmp %i5, 0
+; CHECK-NEXT: bne %icc, .LBB3_1
+; CHECK-NEXT: std %g2, [%fp+-24]
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
; CHECK-NEXT: mov %g2, %i0
; CHECK-NEXT: ret
@@ -278,7 +281,7 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; CHECK-LABEL: atomicrmw_udec_wrap_i64:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: save %sp, -104, %sp
+; CHECK-NEXT: save %sp, -120, %sp
; CHECK-NEXT: .cfi_def_cfa_register %fp
; CHECK-NEXT: .cfi_window_save
; CHECK-NEXT: .cfi_register %o7, %i7
@@ -303,17 +306,20 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; CHECK-NEXT: move %icc, %l0, %g4
; CHECK-NEXT: or %i5, %g4, %i5
; CHECK-NEXT: cmp %i5, 0
-; CHECK-NEXT: movne %icc, %i1, %o2
; CHECK-NEXT: movne %icc, %i2, %o3
+; CHECK-NEXT: movne %icc, %i1, %o2
; CHECK-NEXT: std %g2, [%fp+-8]
+; CHECK-NEXT: std %o2, [%fp+-16]
; CHECK-NEXT: mov %i0, %o0
; CHECK-NEXT: mov %i3, %o1
; CHECK-NEXT: mov %i4, %o4
; CHECK-NEXT: call __atomic_compare_exchange_8
; CHECK-NEXT: mov %i4, %o5
-; CHECK-NEXT: cmp %o0, 0
-; CHECK-NEXT: be %icc, .LBB7_1
; CHECK-NEXT: ldd [%fp+-8], %g2
+; CHECK-NEXT: and %o0, 255, %i5
+; CHECK-NEXT: cmp %i5, 0
+; CHECK-NEXT: bne %icc, .LBB7_1
+; CHECK-NEXT: std %g2, [%fp+-24]
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
; CHECK-NEXT: mov %g2, %i0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/X86/atomic-idempotent.ll b/llvm/test/CodeGen/X86/atomic-idempotent.ll
index d5c46485068a64..10b93b86a56c57 100644
--- a/llvm/test/CodeGen/X86/atomic-idempotent.ll
+++ b/llvm/test/CodeGen/X86/atomic-idempotent.ll
@@ -176,66 +176,68 @@ define i128 @or128(ptr %p) {
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
-; X86-GENERIC-LABEL: or128:
-; X86-GENERIC: # %bb.0:
-; X86-GENERIC-NEXT: pushl %ebp
-; X86-GENERIC-NEXT: .cfi_def_cfa_offset 8
-; X86-GENERIC-NEXT: .cfi_offset %ebp, -8
-; X86-GENERIC-NEXT: movl %esp, %ebp
-; X86-GENERIC-NEXT: .cfi_def_cfa_register %ebp
-; X86-GENERIC-NEXT: pushl %ebx
-; X86-GENERIC-NEXT: pushl %edi
-; X86-GENERIC-NEXT: pushl %esi
-; X86-GENERIC-NEXT: andl $-16, %esp
-; X86-GENERIC-NEXT: subl $48, %esp
-; X86-GENERIC-NEXT: .cfi_offset %esi, -20
-; X86-GENERIC-NEXT: .cfi_offset %edi, -16
-; X86-GENERIC-NEXT: .cfi_offset %ebx, -12
-; X86-GENERIC-NEXT: movl 12(%ebp), %edi
-; X86-GENERIC-NEXT: movl 12(%edi), %ecx
-; X86-GENERIC-NEXT: movl 8(%edi), %edx
-; X86-GENERIC-NEXT: movl (%edi), %ebx
-; X86-GENERIC-NEXT: movl 4(%edi), %esi
-; X86-GENERIC-NEXT: .p2align 4, 0x90
-; X86-GENERIC-NEXT: .LBB4_1: # %atomicrmw.start
-; X86-GENERIC-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-GENERIC-NEXT: movl %ebx, (%esp)
-; X86-GENERIC-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-GENERIC-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-GENERIC-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-GENERIC-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-GENERIC-NEXT: movl %ebx, {{[0-9]+}}(%esp)
-; X86-GENERIC-NEXT: pushl $0
-; X86-GENERIC-NEXT: pushl $0
-; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT: pushl %eax
-; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT: pushl %eax
-; X86-GENERIC-NEXT: pushl %edi
-; X86-GENERIC-NEXT: pushl $16
-; X86-GENERIC-NEXT: calll __atomic_compare_exchange at PLT
-; X86-GENERIC-NEXT: addl $24, %esp
-; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-GENERIC-NEXT: movl (%esp), %ebx
-; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-GENERIC-NEXT: testb %al, %al
-; X86-GENERIC-NEXT: je .LBB4_1
-; X86-GENERIC-NEXT: # %bb.2: # %atomicrmw.end
-; X86-GENERIC-NEXT: movl 8(%ebp), %eax
-; X86-GENERIC-NEXT: movl %ebx, (%eax)
-; X86-GENERIC-NEXT: movl %esi, 4(%eax)
-; X86-GENERIC-NEXT: movl %edx, 8(%eax)
-; X86-GENERIC-NEXT: movl %ecx, 12(%eax)
-; X86-GENERIC-NEXT: leal -12(%ebp), %esp
-; X86-GENERIC-NEXT: popl %esi
-; X86-GENERIC-NEXT: popl %edi
-; X86-GENERIC-NEXT: popl %ebx
-; X86-GENERIC-NEXT: popl %ebp
-; X86-GENERIC-NEXT: .cfi_def_cfa %esp, 4
-; X86-GENERIC-NEXT: retl $4
+; X86-SSE2-LABEL: or128:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
+; X86-SSE2-NEXT: .cfi_offset %ebp, -8
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
+; X86-SSE2-NEXT: pushl %ebx
+; X86-SSE2-NEXT: pushl %edi
+; X86-SSE2-NEXT: pushl %esi
+; X86-SSE2-NEXT: andl $-16, %esp
+; X86-SSE2-NEXT: subl $64, %esp
+; X86-SSE2-NEXT: .cfi_offset %esi, -20
+; X86-SSE2-NEXT: .cfi_offset %edi, -16
+; X86-SSE2-NEXT: .cfi_offset %ebx, -12
+; X86-SSE2-NEXT: movl 12(%ebp), %edi
+; X86-SSE2-NEXT: movl 12(%edi), %ecx
+; X86-SSE2-NEXT: movl 8(%edi), %edx
+; X86-SSE2-NEXT: movl (%edi), %ebx
+; X86-SSE2-NEXT: movl 4(%edi), %esi
+; X86-SSE2-NEXT: .p2align 4, 0x90
+; X86-SSE2-NEXT: .LBB4_1: # %atomicrmw.start
+; X86-SSE2-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-SSE2-NEXT: movl %ebx, (%esp)
+; X86-SSE2-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT: pushl $0
+; X86-SSE2-NEXT: pushl $0
+; X86-SSE2-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: pushl %eax
+; X86-SSE2-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: pushl %eax
+; X86-SSE2-NEXT: pushl %edi
+; X86-SSE2-NEXT: pushl $16
+; X86-SSE2-NEXT: calll __atomic_compare_exchange at PLT
+; X86-SSE2-NEXT: addl $24, %esp
+; X86-SSE2-NEXT: movaps (%esp), %xmm0
+; X86-SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-SSE2-NEXT: testb %al, %al
+; X86-SSE2-NEXT: jne .LBB4_1
+; X86-SSE2-NEXT: # %bb.2: # %atomicrmw.end
+; X86-SSE2-NEXT: movl 8(%ebp), %eax
+; X86-SSE2-NEXT: movl %ebx, (%eax)
+; X86-SSE2-NEXT: movl %esi, 4(%eax)
+; X86-SSE2-NEXT: movl %edx, 8(%eax)
+; X86-SSE2-NEXT: movl %ecx, 12(%eax)
+; X86-SSE2-NEXT: leal -12(%ebp), %esp
+; X86-SSE2-NEXT: popl %esi
+; X86-SSE2-NEXT: popl %edi
+; X86-SSE2-NEXT: popl %ebx
+; X86-SSE2-NEXT: popl %ebp
+; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
+; X86-SSE2-NEXT: retl $4
;
; X86-ATOM-LABEL: or128:
; X86-ATOM: # %bb.0:
@@ -264,10 +266,10 @@ define i128 @or128(ptr %p) {
; X86-ATOM-NEXT: movl %ebx, {{[0-9]+}}(%esp)
; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-ATOM-NEXT: movl %ebx, {{[0-9]+}}(%esp)
; X86-ATOM-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-ATOM-NEXT: pushl $0
; X86-ATOM-NEXT: pushl $0
; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -278,12 +280,14 @@ define i128 @or128(ptr %p) {
; X86-ATOM-NEXT: pushl $16
; X86-ATOM-NEXT: calll __atomic_compare_exchange at PLT
; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp
+; X86-ATOM-NEXT: movaps (%esp), %xmm0
+; X86-ATOM-NEXT: testb %al, %al
+; X86-ATOM-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-ATOM-NEXT: testb %al, %al
-; X86-ATOM-NEXT: movl (%esp), %esi
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-ATOM-NEXT: je .LBB4_1
+; X86-ATOM-NEXT: jne .LBB4_1
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
; X86-ATOM-NEXT: movl 8(%ebp), %eax
; X86-ATOM-NEXT: movl %esi, (%eax)
@@ -526,61 +530,63 @@ define void @or128_nouse_seq_cst(ptr %p) {
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
-; X86-GENERIC-LABEL: or128_nouse_seq_cst:
-; X86-GENERIC: # %bb.0:
-; X86-GENERIC-NEXT: pushl %ebp
-; X86-GENERIC-NEXT: .cfi_def_cfa_offset 8
-; X86-GENERIC-NEXT: .cfi_offset %ebp, -8
-; X86-GENERIC-NEXT: movl %esp, %ebp
-; X86-GENERIC-NEXT: .cfi_def_cfa_register %ebp
-; X86-GENERIC-NEXT: pushl %ebx
-; X86-GENERIC-NEXT: pushl %edi
-; X86-GENERIC-NEXT: pushl %esi
-; X86-GENERIC-NEXT: andl $-16, %esp
-; X86-GENERIC-NEXT: subl $48, %esp
-; X86-GENERIC-NEXT: .cfi_offset %esi, -20
-; X86-GENERIC-NEXT: .cfi_offset %edi, -16
-; X86-GENERIC-NEXT: .cfi_offset %ebx, -12
-; X86-GENERIC-NEXT: movl 8(%ebp), %esi
-; X86-GENERIC-NEXT: movl 12(%esi), %ecx
-; X86-GENERIC-NEXT: movl 8(%esi), %edi
-; X86-GENERIC-NEXT: movl (%esi), %edx
-; X86-GENERIC-NEXT: movl 4(%esi), %ebx
-; X86-GENERIC-NEXT: .p2align 4, 0x90
-; X86-GENERIC-NEXT: .LBB12_1: # %atomicrmw.start
-; X86-GENERIC-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-GENERIC-NEXT: movl %edx, (%esp)
-; X86-GENERIC-NEXT: movl %ebx, {{[0-9]+}}(%esp)
-; X86-GENERIC-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-GENERIC-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-GENERIC-NEXT: movl %ebx, {{[0-9]+}}(%esp)
-; X86-GENERIC-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-GENERIC-NEXT: pushl $5
-; X86-GENERIC-NEXT: pushl $5
-; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT: pushl %eax
-; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT: pushl %eax
-; X86-GENERIC-NEXT: pushl %esi
-; X86-GENERIC-NEXT: pushl $16
-; X86-GENERIC-NEXT: calll __atomic_compare_exchange at PLT
-; X86-GENERIC-NEXT: addl $24, %esp
-; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-GENERIC-NEXT: movl (%esp), %edx
-; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-GENERIC-NEXT: testb %al, %al
-; X86-GENERIC-NEXT: je .LBB12_1
-; X86-GENERIC-NEXT: # %bb.2: # %atomicrmw.end
-; X86-GENERIC-NEXT: leal -12(%ebp), %esp
-; X86-GENERIC-NEXT: popl %esi
-; X86-GENERIC-NEXT: popl %edi
-; X86-GENERIC-NEXT: popl %ebx
-; X86-GENERIC-NEXT: popl %ebp
-; X86-GENERIC-NEXT: .cfi_def_cfa %esp, 4
-; X86-GENERIC-NEXT: retl
+; X86-SSE2-LABEL: or128_nouse_seq_cst:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
+; X86-SSE2-NEXT: .cfi_offset %ebp, -8
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
+; X86-SSE2-NEXT: pushl %ebx
+; X86-SSE2-NEXT: pushl %edi
+; X86-SSE2-NEXT: pushl %esi
+; X86-SSE2-NEXT: andl $-16, %esp
+; X86-SSE2-NEXT: subl $64, %esp
+; X86-SSE2-NEXT: .cfi_offset %esi, -20
+; X86-SSE2-NEXT: .cfi_offset %edi, -16
+; X86-SSE2-NEXT: .cfi_offset %ebx, -12
+; X86-SSE2-NEXT: movl 8(%ebp), %esi
+; X86-SSE2-NEXT: movl 12(%esi), %ecx
+; X86-SSE2-NEXT: movl 8(%esi), %edx
+; X86-SSE2-NEXT: movl (%esi), %edi
+; X86-SSE2-NEXT: movl 4(%esi), %ebx
+; X86-SSE2-NEXT: .p2align 4, 0x90
+; X86-SSE2-NEXT: .LBB12_1: # %atomicrmw.start
+; X86-SSE2-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-SSE2-NEXT: movl %edi, (%esp)
+; X86-SSE2-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT: pushl $5
+; X86-SSE2-NEXT: pushl $5
+; X86-SSE2-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: pushl %eax
+; X86-SSE2-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: pushl %eax
+; X86-SSE2-NEXT: pushl %esi
+; X86-SSE2-NEXT: pushl $16
+; X86-SSE2-NEXT: calll __atomic_compare_exchange at PLT
+; X86-SSE2-NEXT: addl $24, %esp
+; X86-SSE2-NEXT: movaps (%esp), %xmm0
+; X86-SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-SSE2-NEXT: testb %al, %al
+; X86-SSE2-NEXT: jne .LBB12_1
+; X86-SSE2-NEXT: # %bb.2: # %atomicrmw.end
+; X86-SSE2-NEXT: leal -12(%ebp), %esp
+; X86-SSE2-NEXT: popl %esi
+; X86-SSE2-NEXT: popl %edi
+; X86-SSE2-NEXT: popl %ebx
+; X86-SSE2-NEXT: popl %ebp
+; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
+; X86-SSE2-NEXT: retl
;
; X86-ATOM-LABEL: or128_nouse_seq_cst:
; X86-ATOM: # %bb.0:
@@ -610,10 +616,10 @@ define void @or128_nouse_seq_cst(ptr %p) {
; X86-ATOM-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-ATOM-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-ATOM-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-ATOM-NEXT: pushl $5
; X86-ATOM-NEXT: pushl $5
; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -623,12 +629,14 @@ define void @or128_nouse_seq_cst(ptr %p) {
; X86-ATOM-NEXT: pushl $16
; X86-ATOM-NEXT: calll __atomic_compare_exchange at PLT
; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp
+; X86-ATOM-NEXT: movaps (%esp), %xmm0
; X86-ATOM-NEXT: testb %al, %al
+; X86-ATOM-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-ATOM-NEXT: movl (%esp), %eax
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-ATOM-NEXT: je .LBB12_1
+; X86-ATOM-NEXT: jne .LBB12_1
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
; X86-ATOM-NEXT: leal -12(%ebp), %esp
; X86-ATOM-NEXT: popl %esi
diff --git a/llvm/test/CodeGen/X86/atomic-xor.ll b/llvm/test/CodeGen/X86/atomic-xor.ll
index 930286c8e5fb34..95260bcbc94262 100644
--- a/llvm/test/CodeGen/X86/atomic-xor.ll
+++ b/llvm/test/CodeGen/X86/atomic-xor.ll
@@ -26,7 +26,7 @@ define i128 @xor128_signbit_used(ptr %p) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $48, %esp
+; X86-NEXT: subl $64, %esp
; X86-NEXT: movl 12(%ebp), %edi
; X86-NEXT: movl 12(%edi), %ecx
; X86-NEXT: movl 8(%edi), %edx
@@ -40,10 +40,10 @@ define i128 @xor128_signbit_used(ptr %p) nounwind {
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: addl $-2147483648, %ecx # imm = 0x80000000
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: pushl $0
; X86-NEXT: pushl $0
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -55,11 +55,15 @@ define i128 @xor128_signbit_used(ptr %p) nounwind {
; X86-NEXT: calll __atomic_compare_exchange at PLT
; X86-NEXT: addl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl (%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
; X86-NEXT: testb %al, %al
-; X86-NEXT: je .LBB1_1
+; X86-NEXT: jne .LBB1_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %ebx, (%eax)
diff --git a/llvm/test/CodeGen/X86/atomic64.ll b/llvm/test/CodeGen/X86/atomic64.ll
index 8f4da356e06cbb..59d260bb543dae 100644
--- a/llvm/test/CodeGen/X86/atomic64.ll
+++ b/llvm/test/CodeGen/X86/atomic64.ll
@@ -328,7 +328,7 @@ define void @atomic_fetch_max64(i64 %x) nounwind {
; I486-NEXT: movl %esp, %ebp
; I486-NEXT: pushl %esi
; I486-NEXT: andl $-8, %esp
-; I486-NEXT: subl $72, %esp
+; I486-NEXT: subl $88, %esp
; I486-NEXT: movl 12(%ebp), %eax
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl 8(%ebp), %eax
@@ -359,12 +359,16 @@ define void @atomic_fetch_max64(i64 %x) nounwind {
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: .LBB6_4: # %atomicrmw.start
; I486-NEXT: # in Loop: Header=BB6_1 Depth=1
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; I486-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; I486-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; I486-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; I486-NEXT: movl {{[0-9]+}}(%esp), %edx
; I486-NEXT: movl %esp, %eax
; I486-NEXT: movl %edx, 12(%eax)
; I486-NEXT: movl %ecx, 8(%eax)
@@ -375,12 +379,16 @@ define void @atomic_fetch_max64(i64 %x) nounwind {
; I486-NEXT: movl $sc64, (%eax)
; I486-NEXT: calll __atomic_compare_exchange_8 at PLT
; I486-NEXT: movb %al, %dl
+; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
+; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; I486-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
; I486-NEXT: testb %dl, %dl
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: je .LBB6_1
+; I486-NEXT: jne .LBB6_1
; I486-NEXT: jmp .LBB6_2
; I486-NEXT: .LBB6_2: # %atomicrmw.end
; I486-NEXT: leal -4(%ebp), %esp
@@ -420,7 +428,7 @@ define void @atomic_fetch_min64(i64 %x) nounwind {
; I486-NEXT: movl %esp, %ebp
; I486-NEXT: pushl %esi
; I486-NEXT: andl $-8, %esp
-; I486-NEXT: subl $72, %esp
+; I486-NEXT: subl $88, %esp
; I486-NEXT: movl 12(%ebp), %eax
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl 8(%ebp), %eax
@@ -451,12 +459,16 @@ define void @atomic_fetch_min64(i64 %x) nounwind {
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: .LBB7_4: # %atomicrmw.start
; I486-NEXT: # in Loop: Header=BB7_1 Depth=1
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; I486-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; I486-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; I486-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; I486-NEXT: movl {{[0-9]+}}(%esp), %edx
; I486-NEXT: movl %esp, %eax
; I486-NEXT: movl %edx, 12(%eax)
; I486-NEXT: movl %ecx, 8(%eax)
@@ -467,12 +479,16 @@ define void @atomic_fetch_min64(i64 %x) nounwind {
; I486-NEXT: movl $sc64, (%eax)
; I486-NEXT: calll __atomic_compare_exchange_8 at PLT
; I486-NEXT: movb %al, %dl
+; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
+; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; I486-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
; I486-NEXT: testb %dl, %dl
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: je .LBB7_1
+; I486-NEXT: jne .LBB7_1
; I486-NEXT: jmp .LBB7_2
; I486-NEXT: .LBB7_2: # %atomicrmw.end
; I486-NEXT: leal -4(%ebp), %esp
@@ -512,7 +528,7 @@ define void @atomic_fetch_umax64(i64 %x) nounwind {
; I486-NEXT: movl %esp, %ebp
; I486-NEXT: pushl %esi
; I486-NEXT: andl $-8, %esp
-; I486-NEXT: subl $72, %esp
+; I486-NEXT: subl $88, %esp
; I486-NEXT: movl 12(%ebp), %eax
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl 8(%ebp), %eax
@@ -543,12 +559,16 @@ define void @atomic_fetch_umax64(i64 %x) nounwind {
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: .LBB8_4: # %atomicrmw.start
; I486-NEXT: # in Loop: Header=BB8_1 Depth=1
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; I486-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; I486-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; I486-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; I486-NEXT: movl {{[0-9]+}}(%esp), %edx
; I486-NEXT: movl %esp, %eax
; I486-NEXT: movl %edx, 12(%eax)
; I486-NEXT: movl %ecx, 8(%eax)
@@ -559,12 +579,16 @@ define void @atomic_fetch_umax64(i64 %x) nounwind {
; I486-NEXT: movl $sc64, (%eax)
; I486-NEXT: calll __atomic_compare_exchange_8 at PLT
; I486-NEXT: movb %al, %dl
+; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
+; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; I486-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
; I486-NEXT: testb %dl, %dl
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: je .LBB8_1
+; I486-NEXT: jne .LBB8_1
; I486-NEXT: jmp .LBB8_2
; I486-NEXT: .LBB8_2: # %atomicrmw.end
; I486-NEXT: leal -4(%ebp), %esp
@@ -604,7 +628,7 @@ define void @atomic_fetch_umin64(i64 %x) nounwind {
; I486-NEXT: movl %esp, %ebp
; I486-NEXT: pushl %esi
; I486-NEXT: andl $-8, %esp
-; I486-NEXT: subl $72, %esp
+; I486-NEXT: subl $88, %esp
; I486-NEXT: movl 12(%ebp), %eax
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl 8(%ebp), %eax
@@ -635,12 +659,16 @@ define void @atomic_fetch_umin64(i64 %x) nounwind {
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: .LBB9_4: # %atomicrmw.start
; I486-NEXT: # in Loop: Header=BB9_1 Depth=1
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; I486-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; I486-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; I486-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; I486-NEXT: movl {{[0-9]+}}(%esp), %edx
; I486-NEXT: movl %esp, %eax
; I486-NEXT: movl %edx, 12(%eax)
; I486-NEXT: movl %ecx, 8(%eax)
@@ -651,12 +679,16 @@ define void @atomic_fetch_umin64(i64 %x) nounwind {
; I486-NEXT: movl $sc64, (%eax)
; I486-NEXT: calll __atomic_compare_exchange_8 at PLT
; I486-NEXT: movb %al, %dl
+; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
+; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; I486-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
; I486-NEXT: testb %dl, %dl
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: je .LBB9_1
+; I486-NEXT: jne .LBB9_1
; I486-NEXT: jmp .LBB9_2
; I486-NEXT: .LBB9_2: # %atomicrmw.end
; I486-NEXT: leal -4(%ebp), %esp
@@ -682,18 +714,26 @@ define void @atomic_fetch_cmpxchg64() nounwind {
; I486-NEXT: pushl %ebp
; I486-NEXT: movl %esp, %ebp
; I486-NEXT: andl $-8, %esp
-; I486-NEXT: subl $32, %esp
+; I486-NEXT: subl $48, %esp
+; I486-NEXT: movl $0, {{[0-9]+}}(%esp)
; I486-NEXT: movl $0, {{[0-9]+}}(%esp)
; I486-NEXT: movl $0, {{[0-9]+}}(%esp)
+; I486-NEXT: movl $1, {{[0-9]+}}(%esp)
+; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; I486-NEXT: movl {{[0-9]+}}(%esp), %edx
; I486-NEXT: movl %esp, %eax
+; I486-NEXT: movl %edx, 12(%eax)
+; I486-NEXT: movl %ecx, 8(%eax)
; I486-NEXT: leal {{[0-9]+}}(%esp), %ecx
; I486-NEXT: movl %ecx, 4(%eax)
; I486-NEXT: movl $2, 20(%eax)
; I486-NEXT: movl $2, 16(%eax)
-; I486-NEXT: movl $0, 12(%eax)
-; I486-NEXT: movl $1, 8(%eax)
; I486-NEXT: movl $sc64, (%eax)
; I486-NEXT: calll __atomic_compare_exchange_8 at PLT
+; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
+; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
+; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
; I486-NEXT: movl %ebp, %esp
; I486-NEXT: popl %ebp
; I486-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/cmpxchg8b.ll b/llvm/test/CodeGen/X86/cmpxchg8b.ll
index 10e957015047b8..a598608a7d37db 100644
--- a/llvm/test/CodeGen/X86/cmpxchg8b.ll
+++ b/llvm/test/CodeGen/X86/cmpxchg8b.ll
@@ -33,10 +33,12 @@ define void @t1(ptr nocapture %p) nounwind ssp {
; I486-NEXT: pushl %ebp
; I486-NEXT: movl %esp, %ebp
; I486-NEXT: andl $-8, %esp
-; I486-NEXT: subl $8, %esp
+; I486-NEXT: subl $24, %esp
; I486-NEXT: movl 8(%ebp), %eax
; I486-NEXT: movl $0, {{[0-9]+}}(%esp)
; I486-NEXT: movl $0, (%esp)
+; I486-NEXT: movl $0, {{[0-9]+}}(%esp)
+; I486-NEXT: movl $1, {{[0-9]+}}(%esp)
; I486-NEXT: movl %esp, %ecx
; I486-NEXT: pushl $5
; I486-NEXT: pushl $5
@@ -46,6 +48,10 @@ define void @t1(ptr nocapture %p) nounwind ssp {
; I486-NEXT: pushl %eax
; I486-NEXT: calll __atomic_compare_exchange_8 at PLT
; I486-NEXT: addl $24, %esp
+; I486-NEXT: movl (%esp), %eax
+; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; I486-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; I486-NEXT: movl %ebp, %esp
; I486-NEXT: popl %ebp
; I486-NEXT: retl
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll
index af6b7e0addfb11..d891d530ae7864 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll
@@ -839,22 +839,17 @@ define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -870,22 +865,17 @@ define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -901,22 +891,17 @@ define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -932,22 +917,17 @@ define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -963,22 +943,17 @@ define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -994,22 +969,17 @@ define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1117,22 +1087,17 @@ define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1148,22 +1113,17 @@ define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1179,22 +1139,17 @@ define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1210,22 +1165,17 @@ define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1333,22 +1283,17 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1364,22 +1309,17 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1395,22 +1335,17 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1426,22 +1361,17 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1549,22 +1479,17 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1580,22 +1505,17 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1611,22 +1531,17 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1642,22 +1557,17 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll
index 69d65e6f1f3799..8e3b37d5e3aa0c 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll
@@ -839,22 +839,17 @@ define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -870,22 +865,17 @@ define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -901,22 +891,17 @@ define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -932,22 +917,17 @@ define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -963,22 +943,17 @@ define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -994,22 +969,17 @@ define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1117,22 +1087,17 @@ define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1148,22 +1113,17 @@ define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1179,22 +1139,17 @@ define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1210,22 +1165,17 @@ define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
-; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1333,22 +1283,17 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1364,22 +1309,17 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1395,22 +1335,17 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1426,22 +1361,17 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1549,22 +1479,17 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1580,22 +1505,17 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1611,22 +1531,17 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1642,22 +1557,17 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
+; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
-; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
-; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-fp128.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-fp128.ll
index e70ab325dd8f31..273dab0d13ca99 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-fp128.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-fp128.ll
@@ -15,23 +15,21 @@ define fp128 @test_atomicrmw_xchg_fp128_global_agent(ptr addrspace(1) %ptr, fp12
define fp128 @test_atomicrmw_fadd_fp128_global_agent(ptr addrspace(1) %ptr, fp128 %value) {
; CHECK-LABEL: @test_atomicrmw_fadd_fp128_global_agent(
-; CHECK-NEXT: [[TMP1:%.*]] = alloca fp128, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = load fp128, ptr addrspace(1) [[PTR:%.*]], align 16
+; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
+; CHECK-NEXT: [[TMP1:%.*]] = load fp128, ptr addrspace(1) [[PTR:%.*]], align 16
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi fp128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[LOADED:%.*]] = phi fp128 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd fp128 [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store fp128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast fp128 [[NEW]] to i128
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP3]], ptr addrspace(5) [[TMP1]], i128 [[TMP4]], i32 5, i32 5)
-; CHECK-NEXT: [[TMP6:%.*]] = load fp128, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { fp128, i1 } poison, fp128 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { fp128, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { fp128, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { fp128, i1 } [[TMP8]], 0
+; CHECK-NEXT: store fp128 [[LOADED]], ptr addrspace(5) [[CMPXCHG_EXPECTED_PTR]], align 16
+; CHECK-NEXT: store fp128 [[NEW]], ptr addrspace(5) [[CMPXCHG_DESIRED_PTR]], align 16
+; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load fp128, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { fp128, i1 } poison, fp128 [[CMPXCHG_PREV_LOAD]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { fp128, i1 } [[TMP2]], i1 false, 1
+; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { fp128, i1 } [[TMP3]], 1
+; CHECK-NEXT: [[NEWLOADED]] = extractvalue { fp128, i1 } [[TMP3]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret fp128 [[NEWLOADED]]
@@ -42,23 +40,21 @@ define fp128 @test_atomicrmw_fadd_fp128_global_agent(ptr addrspace(1) %ptr, fp12
define fp128 @test_atomicrmw_fsub_fp128_global_agent(ptr addrspace(1) %ptr, fp128 %value) {
; CHECK-LABEL: @test_atomicrmw_fsub_fp128_global_agent(
-; CHECK-NEXT: [[TMP1:%.*]] = alloca fp128, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = load fp128, ptr addrspace(1) [[PTR:%.*]], align 16
+; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
+; CHECK-NEXT: [[TMP1:%.*]] = load fp128, ptr addrspace(1) [[PTR:%.*]], align 16
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi fp128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[LOADED:%.*]] = phi fp128 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub fp128 [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store fp128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast fp128 [[NEW]] to i128
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP3]], ptr addrspace(5) [[TMP1]], i128 [[TMP4]], i32 5, i32 5)
-; CHECK-NEXT: [[TMP6:%.*]] = load fp128, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { fp128, i1 } poison, fp128 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { fp128, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { fp128, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { fp128, i1 } [[TMP8]], 0
+; CHECK-NEXT: store fp128 [[LOADED]], ptr addrspace(5) [[CMPXCHG_EXPECTED_PTR]], align 16
+; CHECK-NEXT: store fp128 [[NEW]], ptr addrspace(5) [[CMPXCHG_DESIRED_PTR]], align 16
+; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load fp128, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { fp128, i1 } poison, fp128 [[CMPXCHG_PREV_LOAD]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { fp128, i1 } [[TMP2]], i1 false, 1
+; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { fp128, i1 } [[TMP3]], 1
+; CHECK-NEXT: [[NEWLOADED]] = extractvalue { fp128, i1 } [[TMP3]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret fp128 [[NEWLOADED]]
@@ -69,23 +65,21 @@ define fp128 @test_atomicrmw_fsub_fp128_global_agent(ptr addrspace(1) %ptr, fp12
define fp128 @test_atomicrmw_fmin_fp128_global_agent(ptr addrspace(1) %ptr, fp128 %value) {
; CHECK-LABEL: @test_atomicrmw_fmin_fp128_global_agent(
-; CHECK-NEXT: [[TMP1:%.*]] = alloca fp128, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = load fp128, ptr addrspace(1) [[PTR:%.*]], align 16
+; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
+; CHECK-NEXT: [[TMP1:%.*]] = load fp128, ptr addrspace(1) [[PTR:%.*]], align 16
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi fp128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = call fp128 @llvm.minnum.f128(fp128 [[LOADED]], fp128 [[VALUE:%.*]])
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store fp128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast fp128 [[TMP3]] to i128
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP4]], ptr addrspace(5) [[TMP1]], i128 [[TMP5]], i32 5, i32 5)
-; CHECK-NEXT: [[TMP7:%.*]] = load fp128, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { fp128, i1 } poison, fp128 [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { fp128, i1 } [[TMP8]], i1 [[TMP6]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { fp128, i1 } [[TMP9]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { fp128, i1 } [[TMP9]], 0
+; CHECK-NEXT: [[LOADED:%.*]] = phi fp128 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = call fp128 @llvm.minnum.f128(fp128 [[LOADED]], fp128 [[VALUE:%.*]])
+; CHECK-NEXT: store fp128 [[LOADED]], ptr addrspace(5) [[CMPXCHG_EXPECTED_PTR]], align 16
+; CHECK-NEXT: store fp128 [[TMP2]], ptr addrspace(5) [[CMPXCHG_DESIRED_PTR]], align 16
+; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load fp128, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { fp128, i1 } poison, fp128 [[CMPXCHG_PREV_LOAD]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = insertvalue { fp128, i1 } [[TMP3]], i1 false, 1
+; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { fp128, i1 } [[TMP4]], 1
+; CHECK-NEXT: [[NEWLOADED]] = extractvalue { fp128, i1 } [[TMP4]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret fp128 [[NEWLOADED]]
@@ -96,23 +90,21 @@ define fp128 @test_atomicrmw_fmin_fp128_global_agent(ptr addrspace(1) %ptr, fp12
define fp128 @test_atomicrmw_fmax_fp128_global_agent(ptr addrspace(1) %ptr, fp128 %value) {
; CHECK-LABEL: @test_atomicrmw_fmax_fp128_global_agent(
-; CHECK-NEXT: [[TMP1:%.*]] = alloca fp128, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = load fp128, ptr addrspace(1) [[PTR:%.*]], align 16
+; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
+; CHECK-NEXT: [[TMP1:%.*]] = load fp128, ptr addrspace(1) [[PTR:%.*]], align 16
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi fp128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = call fp128 @llvm.maxnum.f128(fp128 [[LOADED]], fp128 [[VALUE:%.*]])
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store fp128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast fp128 [[TMP3]] to i128
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP4]], ptr addrspace(5) [[TMP1]], i128 [[TMP5]], i32 5, i32 5)
-; CHECK-NEXT: [[TMP7:%.*]] = load fp128, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { fp128, i1 } poison, fp128 [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { fp128, i1 } [[TMP8]], i1 [[TMP6]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { fp128, i1 } [[TMP9]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { fp128, i1 } [[TMP9]], 0
+; CHECK-NEXT: [[LOADED:%.*]] = phi fp128 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = call fp128 @llvm.maxnum.f128(fp128 [[LOADED]], fp128 [[VALUE:%.*]])
+; CHECK-NEXT: store fp128 [[LOADED]], ptr addrspace(5) [[CMPXCHG_EXPECTED_PTR]], align 16
+; CHECK-NEXT: store fp128 [[TMP2]], ptr addrspace(5) [[CMPXCHG_DESIRED_PTR]], align 16
+; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load fp128, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { fp128, i1 } poison, fp128 [[CMPXCHG_PREV_LOAD]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = insertvalue { fp128, i1 } [[TMP3]], i1 false, 1
+; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { fp128, i1 } [[TMP4]], 1
+; CHECK-NEXT: [[NEWLOADED]] = extractvalue { fp128, i1 } [[TMP4]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret fp128 [[NEWLOADED]]
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i128.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i128.ll
index 8e6602cb1681fa..5e989c8614a0c6 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i128.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i128.ll
@@ -4,8 +4,8 @@
define i128 @test_atomicrmw_xchg_i128_global(ptr addrspace(1) %ptr, i128 %value) {
; CHECK-LABEL: @test_atomicrmw_xchg_i128_global(
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[PTR:%.*]] to ptr
-; CHECK-NEXT: [[RES:%.*]] = call i128 @__atomic_exchange_16(ptr [[TMP1]], i128 [[VALUE:%.*]], i32 5)
-; CHECK-NEXT: ret i128 [[RES]]
+; CHECK-NEXT: [[TMP2:%.*]] = call i128 @__atomic_exchange_16(ptr [[TMP1]], i128 [[VALUE:%.*]], i32 5)
+; CHECK-NEXT: ret i128 [[TMP2]]
;
%res = atomicrmw xchg ptr addrspace(1) %ptr, i128 %value seq_cst
ret i128 %res
@@ -14,8 +14,8 @@ define i128 @test_atomicrmw_xchg_i128_global(ptr addrspace(1) %ptr, i128 %value)
define i128 @test_atomicrmw_add_i128_global(ptr addrspace(1) %ptr, i128 %value) {
; CHECK-LABEL: @test_atomicrmw_add_i128_global(
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[PTR:%.*]] to ptr
-; CHECK-NEXT: [[RES:%.*]] = call i128 @__atomic_fetch_add_16(ptr [[TMP1]], i128 [[VALUE:%.*]], i32 5)
-; CHECK-NEXT: ret i128 [[RES]]
+; CHECK-NEXT: [[TMP2:%.*]] = call i128 @__atomic_fetch_add_16(ptr [[TMP1]], i128 [[VALUE:%.*]], i32 5)
+; CHECK-NEXT: ret i128 [[TMP2]]
;
%res = atomicrmw add ptr addrspace(1) %ptr, i128 %value seq_cst
ret i128 %res
@@ -24,8 +24,8 @@ define i128 @test_atomicrmw_add_i128_global(ptr addrspace(1) %ptr, i128 %value)
define i128 @test_atomicrmw_sub_i128_global(ptr addrspace(1) %ptr, i128 %value) {
; CHECK-LABEL: @test_atomicrmw_sub_i128_global(
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[PTR:%.*]] to ptr
-; CHECK-NEXT: [[RES:%.*]] = call i128 @__atomic_fetch_sub_16(ptr [[TMP1]], i128 [[VALUE:%.*]], i32 5)
-; CHECK-NEXT: ret i128 [[RES]]
+; CHECK-NEXT: [[TMP2:%.*]] = call i128 @__atomic_fetch_sub_16(ptr [[TMP1]], i128 [[VALUE:%.*]], i32 5)
+; CHECK-NEXT: ret i128 [[TMP2]]
;
%res = atomicrmw sub ptr addrspace(1) %ptr, i128 %value seq_cst
ret i128 %res
@@ -34,8 +34,8 @@ define i128 @test_atomicrmw_sub_i128_global(ptr addrspace(1) %ptr, i128 %value)
define i128 @test_atomicrmw_and_i128_global(ptr addrspace(1) %ptr, i128 %value) {
; CHECK-LABEL: @test_atomicrmw_and_i128_global(
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[PTR:%.*]] to ptr
-; CHECK-NEXT: [[TMP7:%.*]] = call i128 @__atomic_fetch_and_16(ptr [[TMP1]], i128 [[VALUE:%.*]], i32 5)
-; CHECK-NEXT: ret i128 [[TMP7]]
+; CHECK-NEXT: [[TMP2:%.*]] = call i128 @__atomic_fetch_and_16(ptr [[TMP1]], i128 [[VALUE:%.*]], i32 5)
+; CHECK-NEXT: ret i128 [[TMP2]]
;
%res = atomicrmw and ptr addrspace(1) %ptr, i128 %value seq_cst
ret i128 %res
@@ -44,8 +44,8 @@ define i128 @test_atomicrmw_and_i128_global(ptr addrspace(1) %ptr, i128 %value)
define i128 @test_atomicrmw_nand_i128_global(ptr addrspace(1) %ptr, i128 %value) {
; CHECK-LABEL: @test_atomicrmw_nand_i128_global(
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[PTR:%.*]] to ptr
-; CHECK-NEXT: [[TMP12:%.*]] = call i128 @__atomic_fetch_nand_16(ptr [[TMP1]], i128 [[VALUE:%.*]], i32 5)
-; CHECK-NEXT: ret i128 [[TMP12]]
+; CHECK-NEXT: [[TMP2:%.*]] = call i128 @__atomic_fetch_nand_16(ptr [[TMP1]], i128 [[VALUE:%.*]], i32 5)
+; CHECK-NEXT: ret i128 [[TMP2]]
;
%res = atomicrmw nand ptr addrspace(1) %ptr, i128 %value seq_cst
ret i128 %res
@@ -54,8 +54,8 @@ define i128 @test_atomicrmw_nand_i128_global(ptr addrspace(1) %ptr, i128 %value)
define i128 @test_atomicrmw_or_i128_global(ptr addrspace(1) %ptr, i128 %value) {
; CHECK-LABEL: @test_atomicrmw_or_i128_global(
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[PTR:%.*]] to ptr
-; CHECK-NEXT: [[TMP7:%.*]] = call i128 @__atomic_fetch_or_16(ptr [[TMP1]], i128 [[VALUE:%.*]], i32 5)
-; CHECK-NEXT: ret i128 [[TMP7]]
+; CHECK-NEXT: [[TMP2:%.*]] = call i128 @__atomic_fetch_or_16(ptr [[TMP1]], i128 [[VALUE:%.*]], i32 5)
+; CHECK-NEXT: ret i128 [[TMP2]]
;
%res = atomicrmw or ptr addrspace(1) %ptr, i128 %value seq_cst
ret i128 %res
@@ -64,8 +64,8 @@ define i128 @test_atomicrmw_or_i128_global(ptr addrspace(1) %ptr, i128 %value) {
define i128 @test_atomicrmw_xor_i128_global(ptr addrspace(1) %ptr, i128 %value) {
; CHECK-LABEL: @test_atomicrmw_xor_i128_global(
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[PTR:%.*]] to ptr
-; CHECK-NEXT: [[TMP7:%.*]] = call i128 @__atomic_fetch_xor_16(ptr [[TMP1]], i128 [[VALUE:%.*]], i32 5)
-; CHECK-NEXT: ret i128 [[TMP7]]
+; CHECK-NEXT: [[TMP2:%.*]] = call i128 @__atomic_fetch_xor_16(ptr [[TMP1]], i128 [[VALUE:%.*]], i32 5)
+; CHECK-NEXT: ret i128 [[TMP2]]
;
%res = atomicrmw xor ptr addrspace(1) %ptr, i128 %value seq_cst
ret i128 %res
@@ -73,26 +73,25 @@ define i128 @test_atomicrmw_xor_i128_global(ptr addrspace(1) %ptr, i128 %value)
define i128 @test_atomicrmw_max_i128_global(ptr addrspace(1) %ptr, i128 %value) {
; CHECK-LABEL: @test_atomicrmw_max_i128_global(
-; CHECK-NEXT: [[TMP1:%.*]] = alloca i128, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
+; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca i128, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca i128, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca i128, align 8, addrspace(5)
+; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i128 [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i128 [[LOADED]], i128 [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP4]], ptr addrspace(5) [[TMP1]], i128 [[NEW]], i32 5, i32 5)
-; CHECK-NEXT: [[TMP6:%.*]] = load i128, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { i128, i1 } poison, i128 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { i128, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[RES]] = extractvalue { i128, i1 } [[TMP8]], 0
+; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i128 [[LOADED]], [[VALUE:%.*]]
+; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i128 [[LOADED]], i128 [[VALUE]]
+; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[CMPXCHG_EXPECTED_PTR]], align 8
+; CHECK-NEXT: store i128 [[NEW]], ptr addrspace(5) [[CMPXCHG_DESIRED_PTR]], align 8
+; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load i128, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { i128, i1 } poison, i128 [[CMPXCHG_PREV_LOAD]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = insertvalue { i128, i1 } [[TMP3]], i1 false, 1
+; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP4]], 1
+; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP4]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret i128 [[RES]]
+; CHECK-NEXT: ret i128 [[NEWLOADED]]
;
%res = atomicrmw max ptr addrspace(1) %ptr, i128 %value seq_cst
ret i128 %res
@@ -100,26 +99,25 @@ define i128 @test_atomicrmw_max_i128_global(ptr addrspace(1) %ptr, i128 %value)
define i128 @test_atomicrmw_min_i128_global(ptr addrspace(1) %ptr, i128 %value) {
; CHECK-LABEL: @test_atomicrmw_min_i128_global(
-; CHECK-NEXT: [[TMP1:%.*]] = alloca i128, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
+; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca i128, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca i128, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca i128, align 8, addrspace(5)
+; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp sle i128 [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i128 [[LOADED]], i128 [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP4]], ptr addrspace(5) [[TMP1]], i128 [[NEW]], i32 5, i32 5)
-; CHECK-NEXT: [[TMP6:%.*]] = load i128, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { i128, i1 } poison, i128 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { i128, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[RES]] = extractvalue { i128, i1 } [[TMP8]], 0
+; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp sle i128 [[LOADED]], [[VALUE:%.*]]
+; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i128 [[LOADED]], i128 [[VALUE]]
+; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[CMPXCHG_EXPECTED_PTR]], align 8
+; CHECK-NEXT: store i128 [[NEW]], ptr addrspace(5) [[CMPXCHG_DESIRED_PTR]], align 8
+; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load i128, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { i128, i1 } poison, i128 [[CMPXCHG_PREV_LOAD]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = insertvalue { i128, i1 } [[TMP3]], i1 false, 1
+; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP4]], 1
+; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP4]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret i128 [[RES]]
+; CHECK-NEXT: ret i128 [[NEWLOADED]]
;
%res = atomicrmw min ptr addrspace(1) %ptr, i128 %value seq_cst
ret i128 %res
@@ -127,26 +125,25 @@ define i128 @test_atomicrmw_min_i128_global(ptr addrspace(1) %ptr, i128 %value)
define i128 @test_atomicrmw_umax_i128_global(ptr addrspace(1) %ptr, i128 %value) {
; CHECK-LABEL: @test_atomicrmw_umax_i128_global(
-; CHECK-NEXT: [[TMP1:%.*]] = alloca i128, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
+; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca i128, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca i128, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca i128, align 8, addrspace(5)
+; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i128 [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i128 [[LOADED]], i128 [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP4]], ptr addrspace(5) [[TMP1]], i128 [[NEW]], i32 5, i32 5)
-; CHECK-NEXT: [[TMP6:%.*]] = load i128, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { i128, i1 } poison, i128 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { i128, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[RES]] = extractvalue { i128, i1 } [[TMP8]], 0
+; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i128 [[LOADED]], [[VALUE:%.*]]
+; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i128 [[LOADED]], i128 [[VALUE]]
+; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[CMPXCHG_EXPECTED_PTR]], align 8
+; CHECK-NEXT: store i128 [[NEW]], ptr addrspace(5) [[CMPXCHG_DESIRED_PTR]], align 8
+; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load i128, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { i128, i1 } poison, i128 [[CMPXCHG_PREV_LOAD]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = insertvalue { i128, i1 } [[TMP3]], i1 false, 1
+; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP4]], 1
+; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP4]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret i128 [[RES]]
+; CHECK-NEXT: ret i128 [[NEWLOADED]]
;
%res = atomicrmw umax ptr addrspace(1) %ptr, i128 %value seq_cst
ret i128 %res
@@ -154,26 +151,25 @@ define i128 @test_atomicrmw_umax_i128_global(ptr addrspace(1) %ptr, i128 %value)
define i128 @test_atomicrmw_umin_i128_global(ptr addrspace(1) %ptr, i128 %value) {
; CHECK-LABEL: @test_atomicrmw_umin_i128_global(
-; CHECK-NEXT: [[TMP1:%.*]] = alloca i128, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
+; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca i128, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca i128, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca i128, align 8, addrspace(5)
+; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i128 [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i128 [[LOADED]], i128 [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP4]], ptr addrspace(5) [[TMP1]], i128 [[NEW]], i32 5, i32 5)
-; CHECK-NEXT: [[TMP6:%.*]] = load i128, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { i128, i1 } poison, i128 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { i128, i1 } [[TMP7]], i1 [[TMP5]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP8]], 1
-; CHECK-NEXT: [[RES]] = extractvalue { i128, i1 } [[TMP8]], 0
+; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i128 [[LOADED]], [[VALUE:%.*]]
+; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i128 [[LOADED]], i128 [[VALUE]]
+; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[CMPXCHG_EXPECTED_PTR]], align 8
+; CHECK-NEXT: store i128 [[NEW]], ptr addrspace(5) [[CMPXCHG_DESIRED_PTR]], align 8
+; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load i128, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { i128, i1 } poison, i128 [[CMPXCHG_PREV_LOAD]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = insertvalue { i128, i1 } [[TMP3]], i1 false, 1
+; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP4]], 1
+; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP4]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret i128 [[RES]]
+; CHECK-NEXT: ret i128 [[NEWLOADED]]
;
%res = atomicrmw umin ptr addrspace(1) %ptr, i128 %value seq_cst
ret i128 %res
@@ -181,17 +177,16 @@ define i128 @test_atomicrmw_umin_i128_global(ptr addrspace(1) %ptr, i128 %value)
define i128 @test_cmpxchg_i128_global(ptr addrspace(1) %out, i128 %in, i128 %old) {
; CHECK-LABEL: @test_cmpxchg_i128_global(
-; CHECK-NEXT: [[TMP1:%.*]] = alloca i128, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca i128, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca i128, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca i128, align 8, addrspace(5)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i128, ptr addrspace(1) [[OUT:%.*]], i64 4
-; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[GEP]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: store i128 [[OLD:%.*]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: [[TMP15:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP2]], ptr addrspace(5) [[TMP1]], i128 [[IN:%.*]], i32 5, i32 5)
-; CHECK-NEXT: [[TMP4:%.*]] = load i128, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
-; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { i128, i1 } poison, i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { i128, i1 } [[TMP20]], i1 [[TMP15]], 1
-; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { i128, i1 } [[TMP21]], 0
+; CHECK-NEXT: store i128 [[OLD:%.*]], ptr addrspace(5) [[CMPXCHG_EXPECTED_PTR]], align 8
+; CHECK-NEXT: store i128 [[IN:%.*]], ptr addrspace(5) [[CMPXCHG_DESIRED_PTR]], align 8
+; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load i128, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i128, i1 } poison, i128 [[CMPXCHG_PREV_LOAD]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { i128, i1 } [[TMP1]], i1 false, 1
+; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
; CHECK-NEXT: ret i128 [[EXTRACT]]
;
%gep = getelementptr i128, ptr addrspace(1) %out, i64 4
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-fp-vector.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-fp-vector.ll
index 4556ad60ccf8ca..bf3c2bb13dc967 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-fp-vector.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-fp-vector.ll
@@ -12,22 +12,17 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align2(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd <2 x half> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <2 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } [[TMP7]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -43,22 +38,17 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent_align2(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd <2 x bfloat> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP7]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -150,22 +140,17 @@ define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align2(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x half> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -181,22 +166,17 @@ define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align2(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x bfloat> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -212,22 +192,17 @@ define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align4(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x half> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -243,22 +218,17 @@ define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align4(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x bfloat> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -344,22 +314,17 @@ define <2 x half> @test_atomicrmw_fsub_v2f16_global_agent_align2(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub <2 x half> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <2 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } [[TMP7]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -375,22 +340,17 @@ define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_global_agent_align2(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub <2 x bfloat> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP7]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -450,22 +410,17 @@ define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align2(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x half> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -481,22 +436,17 @@ define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align2(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x bfloat> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -512,22 +462,17 @@ define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align4(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x half> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -543,22 +488,17 @@ define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align4(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x bfloat> [[LOADED]], [[VALUE]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -644,22 +584,17 @@ define <2 x half> @test_atomicrmw_fmin_v2f16_global_agent_align2(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[LOADED]], <2 x half> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <2 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x half>, i1 } [[TMP8]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -675,22 +610,17 @@ define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_global_agent_align2(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> [[LOADED]], <2 x bfloat> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP8]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -750,22 +680,17 @@ define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align2(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.minnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -781,22 +706,17 @@ define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align2(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -812,22 +732,17 @@ define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align4(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.minnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -843,22 +758,17 @@ define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align4(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -944,22 +854,17 @@ define <2 x half> @test_atomicrmw_fmax_v2f16_global_agent_align2(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x half> @llvm.maxnum.v2f16(<2 x half> [[LOADED]], <2 x half> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <2 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x half>, i1 } [[TMP8]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -975,22 +880,17 @@ define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_global_agent_align2(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> [[LOADED]], <2 x bfloat> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP8]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1050,22 +950,17 @@ define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align2(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.maxnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1081,22 +976,17 @@ define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align2(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1112,22 +1002,17 @@ define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align4(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.maxnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1143,22 +1028,17 @@ define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align4(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
diff --git a/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll b/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll
index b94023b97a2950..70334068e63226 100644
--- a/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll
+++ b/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll
@@ -30,15 +30,19 @@ define i1 @test_cmpxchg_seq_cst(ptr %addr, i128 %desire, i128 %new) {
;
; PWR7-LABEL: @test_cmpxchg_seq_cst(
; PWR7-NEXT: entry:
-; PWR7-NEXT: [[TMP0:%.*]] = alloca i128, align 8
-; PWR7-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP0]])
-; PWR7-NEXT: store i128 [[DESIRE:%.*]], ptr [[TMP0]], align 8
-; PWR7-NEXT: [[TMP1:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[ADDR:%.*]], ptr [[TMP0]], i128 [[NEW:%.*]], i32 5, i32 5)
-; PWR7-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8
-; PWR7-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP0]])
-; PWR7-NEXT: [[TMP3:%.*]] = insertvalue { i128, i1 } poison, i128 [[TMP2]], 0
-; PWR7-NEXT: [[TMP4:%.*]] = insertvalue { i128, i1 } [[TMP3]], i1 [[TMP1]], 1
-; PWR7-NEXT: [[SUCC:%.*]] = extractvalue { i128, i1 } [[TMP4]], 1
+; PWR7-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca i128, align 8
+; PWR7-NEXT: store i128 [[DESIRE:%.*]], ptr [[CMPXCHG_EXPECTED_PTR]], align 8
+; PWR7-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca i128, align 8
+; PWR7-NEXT: store i128 [[NEW:%.*]], ptr [[CMPXCHG_DESIRED_PTR]], align 8
+; PWR7-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca i128, align 8
+; PWR7-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i128, ptr [[CMPXCHG_DESIRED_PTR]], align 8
+; PWR7-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_16:%.*]] = call i8 @__atomic_compare_exchange_16(ptr [[ADDR:%.*]], ptr [[CMPXCHG_EXPECTED_PTR]], i128 [[CMPXCHG_DESIRED]], i32 5, i32 5)
+; PWR7-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_16]], 0
+; PWR7-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[CMPXCHG_PREV_PTR]], ptr [[CMPXCHG_EXPECTED_PTR]], i64 16, i1 false)
+; PWR7-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load i128, ptr [[CMPXCHG_PREV_PTR]], align 8
+; PWR7-NEXT: [[TMP0:%.*]] = insertvalue { i128, i1 } poison, i128 [[CMPXCHG_PREV_LOAD]], 0
+; PWR7-NEXT: [[TMP1:%.*]] = insertvalue { i128, i1 } [[TMP0]], i1 [[CMPXCHG_SUCCESS]], 1
+; PWR7-NEXT: [[SUCC:%.*]] = extractvalue { i128, i1 } [[TMP1]], 1
; PWR7-NEXT: ret i1 [[SUCC]]
;
entry:
diff --git a/llvm/test/Transforms/AtomicExpand/RISCV/atomicrmw-fp.ll b/llvm/test/Transforms/AtomicExpand/RISCV/atomicrmw-fp.ll
index 7e41583189c3d3..86cea814de5edf 100644
--- a/llvm/test/Transforms/AtomicExpand/RISCV/atomicrmw-fp.ll
+++ b/llvm/test/Transforms/AtomicExpand/RISCV/atomicrmw-fp.ll
@@ -3,22 +3,25 @@
define float @test_atomicrmw_fadd_f32(ptr %ptr, float %value) {
; CHECK-LABEL: @test_atomicrmw_fadd_f32(
-; CHECK-NEXT: [[TMP1:%.*]] = alloca float, align 4
-; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[PTR:%.*]], align 4
+; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca float, align 4
+; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca float, align 4
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca float, align 4
+; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[TMP1]])
-; CHECK-NEXT: store float [[LOADED]], ptr [[TMP1]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast float [[NEW]] to i32
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange_4(ptr [[PTR]], ptr [[TMP1]], i32 [[TMP5]], i32 5, i32 5)
-; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[TMP1]])
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { float, i1 } poison, float [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { float, i1 } [[TMP8]], i1 [[TMP6]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { float, i1 } [[TMP9]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { float, i1 } [[TMP9]], 0
+; CHECK-NEXT: store float [[LOADED]], ptr [[CMPXCHG_EXPECTED_PTR]], align 4
+; CHECK-NEXT: store float [[NEW]], ptr [[CMPXCHG_DESIRED_PTR]], align 4
+; CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[CMPXCHG_DESIRED_PTR]], align 4
+; CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_4:%.*]] = call i8 @__atomic_compare_exchange_4(ptr [[PTR]], ptr [[CMPXCHG_EXPECTED_PTR]], i32 [[CMPXCHG_DESIRED]], i32 5, i32 5)
+; CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_4]], 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[CMPXCHG_PREV_PTR]], ptr [[CMPXCHG_EXPECTED_PTR]], i64 4, i1 false)
+; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load float, ptr [[CMPXCHG_PREV_PTR]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { float, i1 } poison, float [[CMPXCHG_PREV_LOAD]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { float, i1 } [[TMP2]], i1 [[CMPXCHG_SUCCESS]], 1
+; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { float, i1 } [[TMP3]], 1
+; CHECK-NEXT: [[NEWLOADED]] = extractvalue { float, i1 } [[TMP3]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret float [[NEWLOADED]]
@@ -29,22 +32,25 @@ define float @test_atomicrmw_fadd_f32(ptr %ptr, float %value) {
define float @test_atomicrmw_fsub_f32(ptr %ptr, float %value) {
; CHECK-LABEL: @test_atomicrmw_fsub_f32(
-; CHECK-NEXT: [[TMP1:%.*]] = alloca float, align 4
-; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[PTR:%.*]], align 4
+; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca float, align 4
+; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca float, align 4
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca float, align 4
+; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[TMP1]])
-; CHECK-NEXT: store float [[LOADED]], ptr [[TMP1]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast float [[NEW]] to i32
-; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange_4(ptr [[PTR]], ptr [[TMP1]], i32 [[TMP5]], i32 5, i32 5)
-; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[TMP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[TMP1]])
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { float, i1 } poison, float [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { float, i1 } [[TMP8]], i1 [[TMP6]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { float, i1 } [[TMP9]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { float, i1 } [[TMP9]], 0
+; CHECK-NEXT: store float [[LOADED]], ptr [[CMPXCHG_EXPECTED_PTR]], align 4
+; CHECK-NEXT: store float [[NEW]], ptr [[CMPXCHG_DESIRED_PTR]], align 4
+; CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[CMPXCHG_DESIRED_PTR]], align 4
+; CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_4:%.*]] = call i8 @__atomic_compare_exchange_4(ptr [[PTR]], ptr [[CMPXCHG_EXPECTED_PTR]], i32 [[CMPXCHG_DESIRED]], i32 5, i32 5)
+; CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_4]], 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[CMPXCHG_PREV_PTR]], ptr [[CMPXCHG_EXPECTED_PTR]], i64 4, i1 false)
+; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load float, ptr [[CMPXCHG_PREV_PTR]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { float, i1 } poison, float [[CMPXCHG_PREV_LOAD]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { float, i1 } [[TMP2]], i1 [[CMPXCHG_SUCCESS]], 1
+; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { float, i1 } [[TMP3]], 1
+; CHECK-NEXT: [[NEWLOADED]] = extractvalue { float, i1 } [[TMP3]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret float [[NEWLOADED]]
diff --git a/llvm/test/Transforms/AtomicExpand/SPARC/libcalls.ll b/llvm/test/Transforms/AtomicExpand/SPARC/libcalls.ll
index 682c1e6848b313..33436edb19417f 100644
--- a/llvm/test/Transforms/AtomicExpand/SPARC/libcalls.ll
+++ b/llvm/test/Transforms/AtomicExpand/SPARC/libcalls.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S %s -passes=atomic-expand | FileCheck %s
;;; NOTE: this test is actually target-independent -- any target which
@@ -12,51 +13,65 @@ target triple = "sparc-unknown-unknown"
;; First, check the sized calls. Except for cmpxchg, these are fairly
;; straightforward.
-; CHECK-LABEL: @test_load_i16(
-; CHECK: %1 = call i16 @__atomic_load_2(ptr %arg, i32 5)
-; CHECK: ret i16 %1
define i16 @test_load_i16(ptr %arg) {
+; CHECK-LABEL: define i16 @test_load_i16(
+; CHECK-SAME: ptr [[ARG:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i16 @__atomic_load_2(ptr [[ARG]], i32 5)
+; CHECK-NEXT: ret i16 [[TMP1]]
+;
%ret = load atomic i16, ptr %arg seq_cst, align 4
ret i16 %ret
}
-; CHECK-LABEL: @test_store_i16(
-; CHECK: call void @__atomic_store_2(ptr %arg, i16 %val, i32 5)
-; CHECK: ret void
define void @test_store_i16(ptr %arg, i16 %val) {
+; CHECK-LABEL: define void @test_store_i16(
+; CHECK-SAME: ptr [[ARG:%.*]], i16 [[VAL:%.*]]) {
+; CHECK-NEXT: call void @__atomic_store_2(ptr [[ARG]], i16 [[VAL]], i32 5)
+; CHECK-NEXT: ret void
+;
store atomic i16 %val, ptr %arg seq_cst, align 4
ret void
}
-; CHECK-LABEL: @test_exchange_i16(
-; CHECK: %1 = call i16 @__atomic_exchange_2(ptr %arg, i16 %val, i32 5)
-; CHECK: ret i16 %1
define i16 @test_exchange_i16(ptr %arg, i16 %val) {
+; CHECK-LABEL: define i16 @test_exchange_i16(
+; CHECK-SAME: ptr [[ARG:%.*]], i16 [[VAL:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i16 @__atomic_exchange_2(ptr [[ARG]], i16 [[VAL]], i32 5)
+; CHECK-NEXT: ret i16 [[TMP1]]
+;
%ret = atomicrmw xchg ptr %arg, i16 %val seq_cst
ret i16 %ret
}
-; CHECK-LABEL: @test_cmpxchg_i16(
-; CHECK: %1 = alloca i16, align 2
-; CHECK: call void @llvm.lifetime.start.p0(i64 2, ptr %1)
-; CHECK: store i16 %old, ptr %1, align 2
-; CHECK: %2 = call zeroext i1 @__atomic_compare_exchange_2(ptr %arg, ptr %1, i16 %new, i32 5, i32 0)
-; CHECK: %3 = load i16, ptr %1, align 2
-; CHECK: call void @llvm.lifetime.end.p0(i64 2, ptr %1)
-; CHECK: %4 = insertvalue { i16, i1 } poison, i16 %3, 0
-; CHECK: %5 = insertvalue { i16, i1 } %4, i1 %2, 1
-; CHECK: %ret = extractvalue { i16, i1 } %5, 0
-; CHECK: ret i16 %ret
define i16 @test_cmpxchg_i16(ptr %arg, i16 %old, i16 %new) {
+; CHECK-LABEL: define i16 @test_cmpxchg_i16(
+; CHECK-SAME: ptr [[ARG:%.*]], i16 [[OLD:%.*]], i16 [[NEW:%.*]]) {
+; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca i16, align 2
+; CHECK-NEXT: store i16 [[OLD]], ptr [[CMPXCHG_EXPECTED_PTR]], align 2
+; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca i16, align 2
+; CHECK-NEXT: store i16 [[NEW]], ptr [[CMPXCHG_DESIRED_PTR]], align 2
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca i16, align 2
+; CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i16, ptr [[CMPXCHG_DESIRED_PTR]], align 2
+; CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_2:%.*]] = call i8 @__atomic_compare_exchange_2(ptr [[ARG]], ptr [[CMPXCHG_EXPECTED_PTR]], i16 [[CMPXCHG_DESIRED]], i32 5, i32 0)
+; CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_2]], 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[CMPXCHG_PREV_PTR]], ptr [[CMPXCHG_EXPECTED_PTR]], i64 2, i1 false)
+; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load i16, ptr [[CMPXCHG_PREV_PTR]], align 2
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i16, i1 } poison, i16 [[CMPXCHG_PREV_LOAD]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { i16, i1 } [[TMP1]], i1 [[CMPXCHG_SUCCESS]], 1
+; CHECK-NEXT: [[RET:%.*]] = extractvalue { i16, i1 } [[TMP2]], 0
+; CHECK-NEXT: ret i16 [[RET]]
+;
%ret_succ = cmpxchg ptr %arg, i16 %old, i16 %new seq_cst monotonic
%ret = extractvalue { i16, i1 } %ret_succ, 0
ret i16 %ret
}
-; CHECK-LABEL: @test_add_i16(
-; CHECK: %1 = call i16 @__atomic_fetch_add_2(ptr %arg, i16 %val, i32 5)
-; CHECK: ret i16 %1
define i16 @test_add_i16(ptr %arg, i16 %val) {
+; CHECK-LABEL: define i16 @test_add_i16(
+; CHECK-SAME: ptr [[ARG:%.*]], i16 [[VAL:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i16 @__atomic_fetch_add_2(ptr [[ARG]], i16 [[VAL]], i32 5)
+; CHECK-NEXT: ret i16 [[TMP1]]
+;
%ret = atomicrmw add ptr %arg, i16 %val seq_cst
ret i16 %ret
}
@@ -66,62 +81,69 @@ define i16 @test_add_i16(ptr %arg, i16 %val) {
;; these tests because the "16" suffixed functions aren't available on
;; 32-bit i386.
-; CHECK-LABEL: @test_load_i128(
-; CHECK: %1 = alloca i128, align 8
-; CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %1)
-; CHECK: call void @__atomic_load(i32 16, ptr %arg, ptr %1, i32 5)
-; CHECK: %2 = load i128, ptr %1, align 8
-; CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %1)
-; CHECK: ret i128 %2
define i128 @test_load_i128(ptr %arg) {
+; CHECK-LABEL: define i128 @test_load_i128(
+; CHECK-SAME: ptr [[ARG:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = alloca i128, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP1]])
+; CHECK-NEXT: call void @__atomic_load(i32 16, ptr [[ARG]], ptr [[TMP1]], i32 5)
+; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP1]])
+; CHECK-NEXT: ret i128 [[TMP2]]
+;
%ret = load atomic i128, ptr %arg seq_cst, align 16
ret i128 %ret
}
-; CHECK-LABEL: @test_store_i128(
-; CHECK: %1 = alloca i128, align 8
-; CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %1)
-; CHECK: store i128 %val, ptr %1, align 8
-; CHECK: call void @__atomic_store(i32 16, ptr %arg, ptr %1, i32 5)
-; CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %1)
-; CHECK: ret void
define void @test_store_i128(ptr %arg, i128 %val) {
+; CHECK-LABEL: define void @test_store_i128(
+; CHECK-SAME: ptr [[ARG:%.*]], i128 [[VAL:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = alloca i128, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP1]])
+; CHECK-NEXT: store i128 [[VAL]], ptr [[TMP1]], align 8
+; CHECK-NEXT: call void @__atomic_store(i32 16, ptr [[ARG]], ptr [[TMP1]], i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP1]])
+; CHECK-NEXT: ret void
+;
store atomic i128 %val, ptr %arg seq_cst, align 16
ret void
}
-; CHECK-LABEL: @test_exchange_i128(
-; CHECK: %1 = alloca i128, align 8
-; CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %1)
-; CHECK: store i128 %val, ptr %1, align 8
-; CHECK: %2 = alloca i128, align 8
-; CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %2)
-; CHECK: call void @__atomic_exchange(i32 16, ptr %arg, ptr %1, ptr %2, i32 5)
-; CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %1)
-; CHECK: %3 = load i128, ptr %2, align 8
-; CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %2)
-; CHECK: ret i128 %3
define i128 @test_exchange_i128(ptr %arg, i128 %val) {
+; CHECK-LABEL: define i128 @test_exchange_i128(
+; CHECK-SAME: ptr [[ARG:%.*]], i128 [[VAL:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = alloca i128, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP1]])
+; CHECK-NEXT: store i128 [[VAL]], ptr [[TMP1]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = alloca i128, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP2]])
+; CHECK-NEXT: call void @__atomic_exchange(i32 16, ptr [[ARG]], ptr [[TMP1]], ptr [[TMP2]], i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = load i128, ptr [[TMP2]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP2]])
+; CHECK-NEXT: ret i128 [[TMP3]]
+;
%ret = atomicrmw xchg ptr %arg, i128 %val seq_cst
ret i128 %ret
}
-; CHECK-LABEL: @test_cmpxchg_i128(
-; CHECK: %1 = alloca i128, align 8
-; CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %1)
-; CHECK: store i128 %old, ptr %1, align 8
-; CHECK: %2 = alloca i128, align 8
-; CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %2)
-; CHECK: store i128 %new, ptr %2, align 8
-; CHECK: %3 = call zeroext i1 @__atomic_compare_exchange(i32 16, ptr %arg, ptr %1, ptr %2, i32 5, i32 0)
-; CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %2)
-; CHECK: %4 = load i128, ptr %1, align 8
-; CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %1)
-; CHECK: %5 = insertvalue { i128, i1 } poison, i128 %4, 0
-; CHECK: %6 = insertvalue { i128, i1 } %5, i1 %3, 1
-; CHECK: %ret = extractvalue { i128, i1 } %6, 0
-; CHECK: ret i128 %ret
define i128 @test_cmpxchg_i128(ptr %arg, i128 %old, i128 %new) {
+; CHECK-LABEL: define i128 @test_cmpxchg_i128(
+; CHECK-SAME: ptr [[ARG:%.*]], i128 [[OLD:%.*]], i128 [[NEW:%.*]]) {
+; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca i128, align 8
+; CHECK-NEXT: store i128 [[OLD]], ptr [[CMPXCHG_EXPECTED_PTR]], align 8
+; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca i128, align 8
+; CHECK-NEXT: store i128 [[NEW]], ptr [[CMPXCHG_DESIRED_PTR]], align 8
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca i128, align 8
+; CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE:%.*]] = call i8 @__atomic_compare_exchange(i32 16, ptr [[ARG]], ptr [[CMPXCHG_EXPECTED_PTR]], ptr [[CMPXCHG_DESIRED_PTR]], i32 5, i32 0)
+; CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE]], 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[CMPXCHG_PREV_PTR]], ptr [[CMPXCHG_EXPECTED_PTR]], i64 16, i1 false)
+; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load i128, ptr [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i128, i1 } poison, i128 [[CMPXCHG_PREV_LOAD]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { i128, i1 } [[TMP1]], i1 [[CMPXCHG_SUCCESS]], 1
+; CHECK-NEXT: [[RET:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
+; CHECK-NEXT: ret i128 [[RET]]
+;
%ret_succ = cmpxchg ptr %arg, i128 %old, i128 %new seq_cst monotonic
%ret = extractvalue { i128, i1 } %ret_succ, 0
ret i128 %ret
@@ -131,68 +153,77 @@ define i128 @test_cmpxchg_i128(ptr %arg, i128 %old, i128 %new) {
; __atomic_fetch_add function, so it needs to expand to a cmpxchg
; loop, which then itself expands into a libcall.
-; CHECK-LABEL: @test_add_i128(
-; CHECK: %1 = alloca i128, align 8
-; CHECK: %2 = alloca i128, align 8
-; CHECK: %3 = load i128, ptr %arg, align 16
-; CHECK: br label %atomicrmw.start
-; CHECK:atomicrmw.start:
-; CHECK: %loaded = phi i128 [ %3, %0 ], [ %newloaded, %atomicrmw.start ]
-; CHECK: %new = add i128 %loaded, %val
-; CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %1)
-; CHECK: store i128 %loaded, ptr %1, align 8
-; CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %2)
-; CHECK: store i128 %new, ptr %2, align 8
-; CHECK: %4 = call zeroext i1 @__atomic_compare_exchange(i32 16, ptr %arg, ptr %1, ptr %2, i32 5, i32 5)
-; CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %2)
-; CHECK: %5 = load i128, ptr %1, align 8
-; CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %1)
-; CHECK: %6 = insertvalue { i128, i1 } poison, i128 %5, 0
-; CHECK: %7 = insertvalue { i128, i1 } %6, i1 %4, 1
-; CHECK: %success = extractvalue { i128, i1 } %7, 1
-; CHECK: %newloaded = extractvalue { i128, i1 } %7, 0
-; CHECK: br i1 %success, label %atomicrmw.end, label %atomicrmw.start
-; CHECK:atomicrmw.end:
-; CHECK: ret i128 %newloaded
define i128 @test_add_i128(ptr %arg, i128 %val) {
+; CHECK-LABEL: define i128 @test_add_i128(
+; CHECK-SAME: ptr [[ARG:%.*]], i128 [[VAL:%.*]]) {
+; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca i128, align 8
+; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca i128, align 8
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca i128, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr [[ARG]], align 16
+; CHECK-NEXT: br label %[[ATOMICRMW_START:.*]]
+; CHECK: [[ATOMICRMW_START]]:
+; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], %[[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[NEW:%.*]] = add i128 [[LOADED]], [[VAL]]
+; CHECK-NEXT: store i128 [[LOADED]], ptr [[CMPXCHG_EXPECTED_PTR]], align 8
+; CHECK-NEXT: store i128 [[NEW]], ptr [[CMPXCHG_DESIRED_PTR]], align 8
+; CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE:%.*]] = call i8 @__atomic_compare_exchange(i32 16, ptr [[ARG]], ptr [[CMPXCHG_EXPECTED_PTR]], ptr [[CMPXCHG_DESIRED_PTR]], i32 5, i32 5)
+; CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE]], 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[CMPXCHG_PREV_PTR]], ptr [[CMPXCHG_EXPECTED_PTR]], i64 16, i1 false)
+; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load i128, ptr [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { i128, i1 } poison, i128 [[CMPXCHG_PREV_LOAD]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { i128, i1 } [[TMP2]], i1 [[CMPXCHG_SUCCESS]], 1
+; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP3]], 1
+; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP3]], 0
+; CHECK-NEXT: br i1 [[SUCCESS]], label %[[ATOMICRMW_END:.*]], label %[[ATOMICRMW_START]]
+; CHECK: [[ATOMICRMW_END]]:
+; CHECK-NEXT: ret i128 [[NEWLOADED]]
+;
%ret = atomicrmw add ptr %arg, i128 %val seq_cst
ret i128 %ret
}
;; Ensure that non-integer types get bitcast correctly on the way in and out of a libcall:
-; CHECK-LABEL: @test_load_double(
-; CHECK: %1 = call i64 @__atomic_load_8(ptr %arg, i32 5)
-; CHECK: %2 = bitcast i64 %1 to double
-; CHECK: ret double %2
define double @test_load_double(ptr %arg, double %val) {
+; CHECK-LABEL: define double @test_load_double(
+; CHECK-SAME: ptr [[ARG:%.*]], double [[VAL:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @__atomic_load_8(ptr [[ARG]], i32 5)
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double
+; CHECK-NEXT: ret double [[TMP2]]
+;
%1 = load atomic double, ptr %arg seq_cst, align 16
ret double %1
}
-; CHECK-LABEL: @test_store_double(
-; CHECK: %1 = bitcast double %val to i64
-; CHECK: call void @__atomic_store_8(ptr %arg, i64 %1, i32 5)
-; CHECK: ret void
define void @test_store_double(ptr %arg, double %val) {
+; CHECK-LABEL: define void @test_store_double(
+; CHECK-SAME: ptr [[ARG:%.*]], double [[VAL:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[VAL]] to i64
+; CHECK-NEXT: call void @__atomic_store_8(ptr [[ARG]], i64 [[TMP1]], i32 5)
+; CHECK-NEXT: ret void
+;
store atomic double %val, ptr %arg seq_cst, align 16
ret void
}
-; CHECK-LABEL: @test_cmpxchg_ptr(
-; CHECK: %1 = alloca ptr, align 4
-; CHECK: call void @llvm.lifetime.start.p0(i64 4, ptr %1)
-; CHECK: store ptr %old, ptr %1, align 4
-; CHECK: %2 = ptrtoint ptr %new to i32
-; CHECK: %3 = call zeroext i1 @__atomic_compare_exchange_4(ptr %arg, ptr %1, i32 %2, i32 5, i32 2)
-; CHECK: %4 = load ptr, ptr %1, align 4
-; CHECK: call void @llvm.lifetime.end.p0(i64 4, ptr %1)
-; CHECK: %5 = insertvalue { ptr, i1 } poison, ptr %4, 0
-; CHECK: %6 = insertvalue { ptr, i1 } %5, i1 %3, 1
-; CHECK: %ret = extractvalue { ptr, i1 } %6, 0
-; CHECK: ret ptr %ret
-; CHECK: }
define ptr @test_cmpxchg_ptr(ptr %arg, ptr %old, ptr %new) {
+; CHECK-LABEL: define ptr @test_cmpxchg_ptr(
+; CHECK-SAME: ptr [[ARG:%.*]], ptr [[OLD:%.*]], ptr [[NEW:%.*]]) {
+; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca ptr, align 4
+; CHECK-NEXT: store ptr [[OLD]], ptr [[CMPXCHG_EXPECTED_PTR]], align 4
+; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca ptr, align 4
+; CHECK-NEXT: store ptr [[NEW]], ptr [[CMPXCHG_DESIRED_PTR]], align 4
+; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca ptr, align 4
+; CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[CMPXCHG_DESIRED_PTR]], align 4
+; CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_4:%.*]] = call i8 @__atomic_compare_exchange_4(ptr [[ARG]], ptr [[CMPXCHG_EXPECTED_PTR]], i32 [[CMPXCHG_DESIRED]], i32 5, i32 2)
+; CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_4]], 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[CMPXCHG_PREV_PTR]], ptr [[CMPXCHG_EXPECTED_PTR]], i64 4, i1 false)
+; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load ptr, ptr [[CMPXCHG_PREV_PTR]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr, i1 } poison, ptr [[CMPXCHG_PREV_LOAD]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr, i1 } [[TMP1]], i1 [[CMPXCHG_SUCCESS]], 1
+; CHECK-NEXT: [[RET:%.*]] = extractvalue { ptr, i1 } [[TMP2]], 0
+; CHECK-NEXT: ret ptr [[RET]]
+;
%ret_succ = cmpxchg ptr %arg, ptr %old, ptr %new seq_cst acquire
%ret = extractvalue { ptr, i1 } %ret_succ, 0
ret ptr %ret
@@ -200,14 +231,16 @@ define ptr @test_cmpxchg_ptr(ptr %arg, ptr %old, ptr %new) {
;; ...and for a non-integer type of large size too.
-; CHECK-LABEL: @test_store_fp128
-; CHECK: %1 = alloca fp128, align 8
-; CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %1)
-; CHECK: store fp128 %val, ptr %1, align 8
-; CHECK: call void @__atomic_store(i32 16, ptr %arg, ptr %1, i32 5)
-; CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %1)
-; CHECK: ret void
define void @test_store_fp128(ptr %arg, fp128 %val) {
+; CHECK-LABEL: define void @test_store_fp128(
+; CHECK-SAME: ptr [[ARG:%.*]], fp128 [[VAL:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = alloca fp128, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP1]])
+; CHECK-NEXT: store fp128 [[VAL]], ptr [[TMP1]], align 8
+; CHECK-NEXT: call void @__atomic_store(i32 16, ptr [[ARG]], ptr [[TMP1]], i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP1]])
+; CHECK-NEXT: ret void
+;
store atomic fp128 %val, ptr %arg seq_cst, align 16
ret void
}
@@ -217,16 +250,30 @@ define void @test_store_fp128(ptr %arg, fp128 %val) {
;; NOTE: atomicrmw and cmpxchg don't yet support an align attribute;
;; when such support is added, they should also be tested here.
-; CHECK-LABEL: @test_unaligned_load_i16(
-; CHECK: __atomic_load(
define i16 @test_unaligned_load_i16(ptr %arg) {
+; CHECK-LABEL: define i16 @test_unaligned_load_i16(
+; CHECK-SAME: ptr [[ARG:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = alloca i16, align 2
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 2, ptr [[TMP1]])
+; CHECK-NEXT: call void @__atomic_load(i32 2, ptr [[ARG]], ptr [[TMP1]], i32 5)
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 2, ptr [[TMP1]])
+; CHECK-NEXT: ret i16 [[TMP2]]
+;
%ret = load atomic i16, ptr %arg seq_cst, align 1
ret i16 %ret
}
-; CHECK-LABEL: @test_unaligned_store_i16(
-; CHECK: __atomic_store(
define void @test_unaligned_store_i16(ptr %arg, i16 %val) {
+; CHECK-LABEL: define void @test_unaligned_store_i16(
+; CHECK-SAME: ptr [[ARG:%.*]], i16 [[VAL:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = alloca i16, align 2
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 2, ptr [[TMP1]])
+; CHECK-NEXT: store i16 [[VAL]], ptr [[TMP1]], align 2
+; CHECK-NEXT: call void @__atomic_store(i32 2, ptr [[ARG]], ptr [[TMP1]], i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 2, ptr [[TMP1]])
+; CHECK-NEXT: ret void
+;
store atomic i16 %val, ptr %arg seq_cst, align 1
ret void
}
diff --git a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
index 81f2c9c55b54dd..129138d3969f6b 100644
--- a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
+++ b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
@@ -34,21 +34,21 @@
#
# CHECK: << Total TLI yes SDK no: 8
# CHECK: >> Total TLI no SDK yes: 0
-# CHECK: == Total TLI yes SDK yes: 248
+# CHECK: == Total TLI yes SDK yes: 254
#
# WRONG_DETAIL: << TLI yes SDK no : '_ZdaPv' aka operator delete[](void*)
# WRONG_DETAIL: >> TLI no SDK yes: '_ZdaPvj' aka operator delete[](void*, unsigned int)
# WRONG_DETAIL-COUNT-8: << TLI yes SDK no : {{.*}}__hot_cold_t
# WRONG_SUMMARY: << Total TLI yes SDK no: 9{{$}}
# WRONG_SUMMARY: >> Total TLI no SDK yes: 1{{$}}
-# WRONG_SUMMARY: == Total TLI yes SDK yes: 247
+# WRONG_SUMMARY: == Total TLI yes SDK yes: 253
#
## The -COUNT suffix doesn't care if there are too many matches, so check
## the exact count first; the two directives should add up to that.
## Yes, this means additions to TLI will fail this test, but the argument
## to -COUNT can't be an expression.
-# AVAIL: TLI knows 489 symbols, 256 available
-# AVAIL-COUNT-256: {{^}} available
+# AVAIL: TLI knows 495 symbols, 262 available
+# AVAIL-COUNT-262: {{^}} available
# AVAIL-NOT: {{^}} available
# UNAVAIL-COUNT-233: not available
# UNAVAIL-NOT: not available
@@ -171,6 +171,30 @@ DynamicSymbols:
Type: STT_FUNC
Section: .text
Binding: STB_GLOBAL
+ - Name: __atomic_compare_exchange
+ Type: STT_FUNC
+ Section: .text
+ Binding: STB_GLOBAL
+ - Name: __atomic_compare_exchange_1
+ Type: STT_FUNC
+ Section: .text
+ Binding: STB_GLOBAL
+ - Name: __atomic_compare_exchange_2
+ Type: STT_FUNC
+ Section: .text
+ Binding: STB_GLOBAL
+ - Name: __atomic_compare_exchange_4
+ Type: STT_FUNC
+ Section: .text
+ Binding: STB_GLOBAL
+ - Name: __atomic_compare_exchange_8
+ Type: STT_FUNC
+ Section: .text
+ Binding: STB_GLOBAL
+ - Name: __atomic_compare_exchange_16
+ Type: STT_FUNC
+ Section: .text
+ Binding: STB_GLOBAL
- Name: abs
Type: STT_FUNC
Section: .text
diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
index d344ebe676799c..61798fc7ba269d 100644
--- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
+++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
@@ -535,6 +535,13 @@ TEST_F(TargetLibraryInfoTest, ValidProto) {
"declare void @__atomic_load(i64, i8*, i8*, i32)\n"
"declare void @__atomic_store(i64, i8*, i8*, i32)\n"
+ "declare i8 @__atomic_compare_exchange(i64, ptr, ptr, ptr, i32, i32)\n"
+ "declare i8 @__atomic_compare_exchange_1(ptr, ptr, i8, i32, i32)\n"
+ "declare i8 @__atomic_compare_exchange_2(ptr, ptr, i16, i32, i32)\n"
+ "declare i8 @__atomic_compare_exchange_4(ptr, ptr, i32, i32, i32)\n"
+ "declare i8 @__atomic_compare_exchange_8(ptr, ptr, i64, i32, i32)\n"
+ "declare i8 @__atomic_compare_exchange_16(ptr, ptr, i128, i32, i32)\n"
+
// These are similar to the FILE* fgetc/fputc.
"declare i32 @_IO_getc(%struct*)\n"
"declare i32 @_IO_putc(i32, %struct*)\n"
>From ee84c6bb3146d25f21c14d6d8e0abb794683f5ec Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Mon, 5 Aug 2024 19:05:33 +0200
Subject: [PATCH 02/17] Rebase fixes
---
clang/lib/CodeGen/CGAtomic.cpp | 6 +-
clang/lib/CodeGen/CodeGenFunction.cpp | 10 ++
clang/lib/CodeGen/CodeGenFunction.h | 5 +
clang/lib/CodeGen/CodeGenModule.cpp | 5 +-
clang/lib/CodeGen/CodeGenModule.h | 7 +-
clang/lib/CodeGen/ModuleBuilder.cpp | 5 +-
.../PowerPC/builtins-ppc-xlcompat-cas.c | 4 +-
clang/test/CodeGen/PowerPC/quadword-atomics.c | 29 +---
.../SystemZ/gnu-atomic-builtins-i128-16Al.c | 4 +-
.../SystemZ/gnu-atomic-builtins-i128-8Al.c | 4 +-
.../CodeGen/SystemZ/gnu-atomic-builtins-i16.c | 4 +-
.../CodeGen/SystemZ/gnu-atomic-builtins-i32.c | 4 +-
.../CodeGen/SystemZ/gnu-atomic-builtins-i64.c | 4 +-
.../CodeGen/SystemZ/gnu-atomic-builtins-i8.c | 4 +-
.../test/CodeGen/X86/x86-atomic-long_double.c | 6 -
clang/test/CodeGen/atomic_ops.c | 76 ++++-----
clang/test/CodeGen/c11atomics.c | 73 ++++-----
clang/test/CodeGen/stack-arg-probe.c | 2 -
clang/test/CodeGenCUDA/atomic-ops.cu | 40 ++---
clang/test/CodeGenCXX/atomic-inline.cpp | 154 +++++++++++++++---
clang/test/CodeGenOpenCL/atomic-ops.cl | 36 ++--
llvm/lib/Transforms/Utils/BuildBuiltins.cpp | 64 ++++----
.../AtomicExpand/AMDGPU/expand-atomic-i128.ll | 17 +-
23 files changed, 315 insertions(+), 248 deletions(-)
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index 40c44df58ba5ef..9fb6ecee50a6d8 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -442,7 +442,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
LLVMPtr, Expected, Desired, IsWeakOp, E->isVolatile(), Order,
FailureOrder, Scope, Expected, Ptr.getElementType(), {}, {}, Align,
CGF.Builder, CGF.CGM.getDataLayout(), CGF.getTargetLibraryInfo(),
- CGF.CGM.getTargetLowering(), SupportedScopes, DefaultScope);
+ CGF.getTargetLowering(), SupportedScopes, DefaultScope);
return;
}
@@ -468,7 +468,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
LLVMPtr, Expected, Desired, IsWeak, E->isVolatile(), Order,
FailureOrder, Scope, Expected, Ptr.getElementType(), {}, {}, Align,
CGF.Builder, CGF.CGM.getDataLayout(), CGF.getTargetLibraryInfo(),
- CGF.CGM.getTargetLowering(), SupportedScopes, DefaultScope);
+ CGF.getTargetLowering(), SupportedScopes, DefaultScope);
CGF.EmitStoreOfScalar(SuccessVal, CGF.MakeAddrLValue(Dest, E->getType()));
return;
}
@@ -1583,7 +1583,7 @@ std::pair<RValue, llvm::Value *> AtomicInfo::EmitAtomicCompareExchange(
Failure, PrevPtr, getAtomicAddress().getElementType(),
getValueSizeInBytes(), getAtomicSizeInBytes(),
getAtomicAlignment().getAsAlign(), CGF.Builder, CGF.CGM.getDataLayout(),
- CGF.getTargetLibraryInfo(), CGF.CGM.getTargetLowering());
+ CGF.getTargetLibraryInfo(), CGF.getTargetLowering());
return std::make_pair(
convertAtomicTempToRValue(PrevAddr, AggValueSlot::ignored(),
SourceLocation(), /*AsValue=*/false),
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index d02eac9abb2d22..c99f068cbb1246 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -37,6 +37,7 @@
#include "clang/Frontend/FrontendDiagnostic.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -47,6 +48,7 @@
#include "llvm/IR/Operator.h"
#include "llvm/Support/CRC.h"
#include "llvm/Support/xxhash.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <optional>
@@ -107,6 +109,12 @@ CodeGenFunction::~CodeGenFunction() {
CGM.getOpenMPRuntime().getOMPBuilder().finalize(CurFn);
}
+const llvm::TargetLowering *CodeGenFunction::getTargetLowering() const {
+ if (!TSI)
+ return nullptr;
+ return TSI->getTargetLowering();
+}
+
// Map the LangOption for exception behavior into
// the corresponding enum in the IR.
llvm::fp::ExceptionBehavior
@@ -1521,6 +1529,8 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
CurFn->addFnAttr(llvm::Attribute::MustProgress);
TLI.reset(new llvm::TargetLibraryInfo(CGM.getTargetLibraryInfoImpl(), Fn));
+ if (const llvm::TargetMachine *TM = CGM.getTargetMachine())
+ TSI = TM->getSubtargetImpl(*Fn);
// Generate the body of the function.
PGO.assignRegionCounters(GD, CurFn);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 894e4af50fb826..5043b501b5f618 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -53,6 +53,7 @@ class SwitchInst;
class Twine;
class Value;
class CanonicalLoopInfo;
+class TargetSubtargetInfo;
}
namespace clang {
@@ -286,6 +287,8 @@ class CodeGenFunction : public CodeGenTypeCache {
CodeGenModule &CGM; // Per-module state.
const TargetInfo &Target;
std::unique_ptr<llvm::TargetLibraryInfo> TLI;
+ // std::unique_ptr<llvm::MCSubtarget> STM;
+ const llvm::TargetSubtargetInfo *TSI = nullptr;
// For EH/SEH outlined funclets, this field points to parent's CGF
CodeGenFunction *ParentCGF = nullptr;
@@ -2155,6 +2158,8 @@ class CodeGenFunction : public CodeGenTypeCache {
return TLI.get();
}
+ const llvm::TargetLowering *getTargetLowering() const;
+
/// Returns a pointer to the function's exception object and selector slot,
/// which is assigned in every landing pad.
Address getExceptionSlot();
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 0146e281b67b00..effc5ed6ebee14 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -71,6 +71,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/xxhash.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/TargetParser/RISCVISAInfo.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/TargetParser/X86TargetParser.h"
@@ -337,13 +338,13 @@ CodeGenModule::CodeGenModule(
ASTContext &C, IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
const HeaderSearchOptions &HSO, const PreprocessorOptions &PPO,
const CodeGenOptions &CGO, llvm::Module &M, DiagnosticsEngine &diags,
- llvm::TargetLibraryInfoImpl &TLII, const llvm::TargetLowering *TL,
+ llvm::TargetLibraryInfoImpl &TLII, const llvm::TargetMachine *TM,
CoverageSourceInfo *CoverageInfo)
: Context(C), LangOpts(C.getLangOpts()), FS(FS), HeaderSearchOpts(HSO),
PreprocessorOpts(PPO), CodeGenOpts(CGO), TheModule(M), Diags(diags),
Target(C.getTargetInfo()), ABI(createCXXABI(*this)),
VMContext(M.getContext()), Types(*this), VTables(*this),
- SanitizerMD(new SanitizerMetadata(*this)), TLII(TLII), TL(TL) {
+ SanitizerMD(new SanitizerMetadata(*this)), TLII(TLII), TM(TM) {
// Initialize the type cache.
llvm::LLVMContext &LLVMContext = M.getContext();
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index d3ea293d49fa51..e11e653cf3f676 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -50,6 +50,7 @@ class FunctionType;
class LLVMContext;
class IndexedInstrProfReader;
class TargetLowering;
+class TargetMachine;
namespace vfs {
class FileSystem;
@@ -309,7 +310,7 @@ class CodeGenModule : public CodeGenTypeCache {
unsigned NumAutoVarInit = 0;
llvm::Module &TheModule;
llvm::TargetLibraryInfoImpl &TLII;
- const llvm::TargetLowering *TL;
+ const llvm::TargetMachine *TM;
DiagnosticsEngine &Diags;
const TargetInfo &Target;
@@ -637,7 +638,7 @@ class CodeGenModule : public CodeGenTypeCache {
const PreprocessorOptions &ppopts,
const CodeGenOptions &CodeGenOpts, llvm::Module &M,
DiagnosticsEngine &Diags, llvm::TargetLibraryInfoImpl &TLII,
- const llvm::TargetLowering *TL,
+ const llvm::TargetMachine *TM,
CoverageSourceInfo *CoverageInfo = nullptr);
~CodeGenModule();
@@ -777,7 +778,7 @@ class CodeGenModule : public CodeGenTypeCache {
return TheModule.getDataLayout();
}
llvm::TargetLibraryInfoImpl &getTargetLibraryInfoImpl() const { return TLII; }
- const llvm::TargetLowering *getTargetLowering() const { return TL; }
+ const llvm::TargetMachine *getTargetMachine() const { return TM; }
const TargetInfo &getTarget() const { return Target; }
const llvm::Triple &getTriple() const { return Target.getTriple(); }
bool supportsCOMDAT() const;
diff --git a/clang/lib/CodeGen/ModuleBuilder.cpp b/clang/lib/CodeGen/ModuleBuilder.cpp
index 06eff706301b47..a698f19f88e548 100644
--- a/clang/lib/CodeGen/ModuleBuilder.cpp
+++ b/clang/lib/CodeGen/ModuleBuilder.cpp
@@ -67,7 +67,6 @@ namespace {
std::unique_ptr<llvm::Module> M;
std::unique_ptr<CodeGen::CodeGenModule> Builder;
std::unique_ptr<llvm::TargetLibraryInfoImpl> TLII;
- std::unique_ptr<llvm::TargetLowering> TL;
const llvm::TargetMachine *TM;
private:
@@ -171,11 +170,9 @@ namespace {
TLII.reset(
llvm::driver::createTLII(TargetTriple, CodeGenOpts.getVecLib()));
- if (TM)
- TL = std::make_unique<llvm::TargetLowering>(*TM);
Builder.reset(new CodeGen::CodeGenModule(
Context, FS, HeaderSearchOpts, PreprocessorOpts, CodeGenOpts, *M,
- Diags, *TLII.get(), TL.get(), CoverageInfo));
+ Diags, *TLII.get(), TM, CoverageInfo));
for (auto &&Lib : CodeGenOpts.DependentLibraries)
Builder->AddDependentLib(Lib);
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cas.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cas.c
index dbd0e86ffbe18c..1d7db42f6c1f3e 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cas.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cas.c
@@ -25,8 +25,8 @@
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[ATOMIC_TEMP1]], align 4
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg weak volatile ptr [[A_ADDR]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] monotonic monotonic, align 4
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP2]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: [[TMP2:%.*]] = load volatile i32, ptr [[ATOMIC_TEMP2]], align 4
// CHECK-NEXT: store i32 [[TMP2]], ptr [[B_ADDR]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[CMPXCHG_SUCCESS]] to i32
@@ -56,8 +56,8 @@ int test_builtin_ppc_compare_and_swap(int a, int b, int c) {
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[ATOMIC_TEMP1]], align 8
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg weak volatile ptr [[A_ADDR]], i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] monotonic monotonic, align 8
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP2]], align 8
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: [[TMP2:%.*]] = load volatile i64, ptr [[ATOMIC_TEMP2]], align 8
// CHECK-NEXT: store i64 [[TMP2]], ptr [[B_ADDR]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[CMPXCHG_SUCCESS]] to i32
diff --git a/clang/test/CodeGen/PowerPC/quadword-atomics.c b/clang/test/CodeGen/PowerPC/quadword-atomics.c
index dc04423060a03b..161ca891d32483 100644
--- a/clang/test/CodeGen/PowerPC/quadword-atomics.c
+++ b/clang/test/CodeGen/PowerPC/quadword-atomics.c
@@ -1,3 +1,4 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64le-linux-gnu \
// RUN: -target-cpu pwr8 -emit-llvm -o - %s | FileCheck %s \
// RUN: --check-prefixes=PPC64,PPC64-QUADWORD-ATOMICS
@@ -23,62 +24,42 @@ typedef _Atomic(Q) AtomicQ;
typedef __int128_t int128_t;
-// PPC64-LABEL: @test_load(
-// PPC64: [[TMP3:%.*]] = load atomic i128, ptr [[TMP1:%.*]] acquire, align 16
-//
Q test_load(AtomicQ *ptr) {
// expected-no-diagnostics
return __c11_atomic_load(ptr, __ATOMIC_ACQUIRE);
}
-// PPC64-LABEL: @test_store(
-// PPC64: store atomic i128 [[TMP6:%.*]], ptr [[TMP4:%.*]] release, align 16
-//
void test_store(Q val, AtomicQ *ptr) {
// expected-no-diagnostics
__c11_atomic_store(ptr, val, __ATOMIC_RELEASE);
}
-// PPC64-LABEL: @test_add(
-// PPC64: [[ATOMICRMW:%.*]] = atomicrmw add ptr [[TMP0:%.*]], i128 [[TMP2:%.*]] monotonic, align 16
-//
void test_add(_Atomic(int128_t) *ptr, int128_t x) {
// expected-no-diagnostics
__c11_atomic_fetch_add(ptr, x, __ATOMIC_RELAXED);
}
-// PPC64-LABEL: @test_xchg(
-// PPC64: [[TMP8:%.*]] = atomicrmw xchg ptr [[TMP4:%.*]], i128 [[TMP7:%.*]] seq_cst, align 16
-//
Q test_xchg(AtomicQ *ptr, Q new) {
// expected-no-diagnostics
return __c11_atomic_exchange(ptr, new, __ATOMIC_SEQ_CST);
}
-// PPC64-LABEL: @test_cmpxchg(
-// PPC64: [[TMP10:%.*]] = cmpxchg ptr [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
-//
int test_cmpxchg(AtomicQ *ptr, Q *cmp, Q new) {
// expected-no-diagnostics
return __c11_atomic_compare_exchange_strong(ptr, cmp, new, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
}
-// PPC64-LABEL: @test_cmpxchg_weak(
-// PPC64: [[TMP10:%.*]] = cmpxchg weak ptr [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
-//
int test_cmpxchg_weak(AtomicQ *ptr, Q *cmp, Q new) {
// expected-no-diagnostics
return __c11_atomic_compare_exchange_weak(ptr, cmp, new, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
}
-// PPC64-QUADWORD-ATOMICS-LABEL: @is_lock_free(
-// PPC64-QUADWORD-ATOMICS: ret i32 1
-//
-// PPC64-NO-QUADWORD-ATOMICS-LABEL: @is_lock_free(
-// PPC64-NO-QUADWORD-ATOMICS: [[CALL:%.*]] = call zeroext i1 @__atomic_is_lock_free(i64 noundef 16, ptr noundef null)
-//
int is_lock_free() {
AtomicQ q;
// expected-no-diagnostics
return __c11_atomic_is_lock_free(sizeof(q));
}
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// PPC64: {{.*}}
+// PPC64-NO-QUADWORD-ATOMICS: {{.*}}
+// PPC64-QUADWORD-ATOMICS: {{.*}}
diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c
index 039e0199d13fa9..b74994ffd46084 100644
--- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c
+++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c
@@ -85,8 +85,8 @@ __int128 f6() {
// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i128, ptr @Exp, align 16
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @Ptr, i128 [[CMPXCHG_EXPECTED]], i128 [[TMP0]] seq_cst seq_cst, align 16
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i128 [[CMPXCHG_PREV]], ptr @Exp, align 16
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f7() {
@@ -100,8 +100,8 @@ _Bool f7() {
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i128, ptr @Des, align 16
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @Ptr, i128 [[CMPXCHG_EXPECTED]], i128 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 16
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i128 [[CMPXCHG_PREV]], ptr @Exp, align 16
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f8() {
diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c
index 39fdd60c8c6315..1cedcd5edb8a0b 100644
--- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c
+++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c
@@ -90,8 +90,8 @@ __int128 f6() {
// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i128, ptr @Exp, align 8
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @Ptr, i128 [[CMPXCHG_EXPECTED]], i128 [[TMP0]] seq_cst seq_cst, align 8
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i128 [[CMPXCHG_PREV]], ptr @Exp, align 8
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f7() {
@@ -105,8 +105,8 @@ _Bool f7() {
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i128, ptr @Des, align 8
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @Ptr, i128 [[CMPXCHG_EXPECTED]], i128 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 8
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i128 [[CMPXCHG_PREV]], ptr @Exp, align 8
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f8() {
diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i16.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i16.c
index 4320c2736350c9..2ea902dde70fc9 100644
--- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i16.c
+++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i16.c
@@ -71,8 +71,8 @@ int16_t f6(int16_t *Ptr, int16_t *Val, int16_t *Ret) {
// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i16, ptr [[EXP:%.*]], align 2
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i16 [[CMPXCHG_EXPECTED]], i16 [[DES:%.*]] seq_cst seq_cst, align 2
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i16 [[CMPXCHG_PREV]], ptr [[EXP]], align 2
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f7(int16_t *Ptr, int16_t *Exp, int16_t Des) {
@@ -86,8 +86,8 @@ _Bool f7(int16_t *Ptr, int16_t *Exp, int16_t Des) {
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i16, ptr [[DES:%.*]], align 2
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i16 [[CMPXCHG_EXPECTED]], i16 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 2
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i16 [[CMPXCHG_PREV]], ptr [[EXP]], align 2
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f8(int16_t *Ptr, int16_t *Exp, int16_t *Des) {
diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i32.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i32.c
index 8691228924f186..08bb0479a0da9a 100644
--- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i32.c
+++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i32.c
@@ -71,8 +71,8 @@ int32_t f6(int32_t *Ptr, int32_t *Val, int32_t *Ret) {
// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[EXP:%.*]], align 4
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i32 [[CMPXCHG_EXPECTED]], i32 [[DES:%.*]] seq_cst seq_cst, align 4
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[EXP]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f7(int32_t *Ptr, int32_t *Exp, int32_t Des) {
@@ -86,8 +86,8 @@ _Bool f7(int32_t *Ptr, int32_t *Exp, int32_t Des) {
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[DES:%.*]], align 4
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 4
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[EXP]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f8(int32_t *Ptr, int32_t *Exp, int32_t *Des) {
diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i64.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i64.c
index 6d5a503df35d38..743637dd2b7906 100644
--- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i64.c
+++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i64.c
@@ -71,8 +71,8 @@ int64_t f6(int64_t *Ptr, int64_t *Val, int64_t *Ret) {
// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i64, ptr [[EXP:%.*]], align 8
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i64 [[CMPXCHG_EXPECTED]], i64 [[DES:%.*]] seq_cst seq_cst, align 8
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[EXP]], align 8
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f7(int64_t *Ptr, int64_t *Exp, int64_t Des) {
@@ -86,8 +86,8 @@ _Bool f7(int64_t *Ptr, int64_t *Exp, int64_t Des) {
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[DES:%.*]], align 8
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 8
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[EXP]], align 8
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f8(int64_t *Ptr, int64_t *Exp, int64_t *Des) {
diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i8.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i8.c
index 9bbc61a7800068..2114055c1cea7a 100644
--- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i8.c
+++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i8.c
@@ -71,8 +71,8 @@ int8_t f6(int8_t *Ptr, int8_t *Val, int8_t *Ret) {
// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i8, ptr [[EXP:%.*]], align 1
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i8 [[CMPXCHG_EXPECTED]], i8 [[DES:%.*]] seq_cst seq_cst, align 1
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i8 [[CMPXCHG_PREV]], ptr [[EXP]], align 1
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f7(int8_t *Ptr, int8_t *Exp, int8_t Des) {
@@ -86,8 +86,8 @@ _Bool f7(int8_t *Ptr, int8_t *Exp, int8_t Des) {
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i8, ptr [[DES:%.*]], align 1
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i8 [[CMPXCHG_EXPECTED]], i8 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 1
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i8 [[CMPXCHG_PREV]], ptr [[EXP]], align 1
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
//
_Bool f8(int8_t *Ptr, int8_t *Exp, int8_t *Des) {
diff --git a/clang/test/CodeGen/X86/x86-atomic-long_double.c b/clang/test/CodeGen/X86/x86-atomic-long_double.c
index c514ff76efbfa9..07237206eb2520 100644
--- a/clang/test/CodeGen/X86/x86-atomic-long_double.c
+++ b/clang/test/CodeGen/X86/x86-atomic-long_double.c
@@ -14,7 +14,6 @@
// X64-NEXT: store float [[TMP2]], ptr [[RETVAL]], align 16
// X64-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16
// X64-NEXT: ret x86_fp80 [[TMP3]]
-//
// X86-LABEL: define dso_local x86_fp80 @testinc(
// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
// X86-NEXT: [[ENTRY:.*:]]
@@ -44,7 +43,6 @@ long double testinc(_Atomic long double *addr) {
// X64-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 16
// X64-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16
// X64-NEXT: ret x86_fp80 [[TMP2]]
-//
// X86-LABEL: define dso_local x86_fp80 @testdec(
// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
// X86-NEXT: [[ENTRY:.*:]]
@@ -98,7 +96,6 @@ long double testdec(_Atomic long double *addr) {
// X64-NEXT: store i128 [[ATOMIC_LOAD4]], ptr [[ATOMIC_TEMP5]], align 16
// X64-NEXT: [[TMP10:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP5]], align 16
// X64-NEXT: ret x86_fp80 [[TMP10]]
-//
// X86-LABEL: define dso_local x86_fp80 @testcompassign(
// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
// X86-NEXT: [[ENTRY:.*]]:
@@ -153,7 +150,6 @@ long double testcompassign(_Atomic long double *addr) {
// X64-NEXT: store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP1]], align 16
// X64-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 16
// X64-NEXT: ret x86_fp80 [[TMP3]]
-//
// X86-LABEL: define dso_local x86_fp80 @testassign(
// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
// X86-NEXT: [[ENTRY:.*:]]
@@ -188,7 +184,6 @@ long double testassign(_Atomic long double *addr) {
// X64-NEXT: store float [[TMP2]], ptr [[RETVAL]], align 16
// X64-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16
// X64-NEXT: ret x86_fp80 [[TMP3]]
-//
// X86-LABEL: define dso_local x86_fp80 @test_volatile_inc(
// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
// X86-NEXT: [[ENTRY:.*:]]
@@ -217,7 +212,6 @@ long double test_volatile_inc(volatile _Atomic long double *addr) {
// X64-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 16
// X64-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16
// X64-NEXT: ret x86_fp80 [[TMP2]]
-//
// X86-LABEL: define dso_local x86_fp80 @test_volatile_dec(
// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
// X86-NEXT: [[ENTRY:.*:]]
diff --git a/clang/test/CodeGen/atomic_ops.c b/clang/test/CodeGen/atomic_ops.c
index 03d1f7909f57e3..7f41fecdb6617d 100644
--- a/clang/test/CodeGen/atomic_ops.c
+++ b/clang/test/CodeGen/atomic_ops.c
@@ -33,8 +33,8 @@
// NATIVE-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[ATOMIC_TEMP1]], align 4
// NATIVE-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[I]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 4
// NATIVE-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// NATIVE-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// NATIVE-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP2]], align 4
+// NATIVE-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// NATIVE-NEXT: [[TMP1]] = load i32, ptr [[ATOMIC_TEMP2]], align 4
// NATIVE-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
// NATIVE: [[ATOMIC_CONT]]:
@@ -49,8 +49,8 @@
// NATIVE-NEXT: [[CMPXCHG_DESIRED10:%.*]] = load i32, ptr [[ATOMIC_TEMP7]], align 4
// NATIVE-NEXT: [[CMPXCHG_PAIR11:%.*]] = cmpxchg ptr [[I]], i32 [[CMPXCHG_EXPECTED9]], i32 [[CMPXCHG_DESIRED10]] seq_cst seq_cst, align 4
// NATIVE-NEXT: [[CMPXCHG_PREV12:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR11]], 0
-// NATIVE-NEXT: [[CMPXCHG_SUCCESS13:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR11]], 1
// NATIVE-NEXT: store i32 [[CMPXCHG_PREV12]], ptr [[ATOMIC_TEMP8]], align 4
+// NATIVE-NEXT: [[CMPXCHG_SUCCESS13:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR11]], 1
// NATIVE-NEXT: [[TMP3]] = load i32, ptr [[ATOMIC_TEMP8]], align 4
// NATIVE-NEXT: br i1 [[CMPXCHG_SUCCESS13]], label %[[ATOMIC_CONT5:.*]], label %[[ATOMIC_OP3]]
// NATIVE: [[ATOMIC_CONT5]]:
@@ -68,8 +68,8 @@
// NATIVE-NEXT: [[CMPXCHG_DESIRED23:%.*]] = load i16, ptr [[ATOMIC_TEMP20]], align 2
// NATIVE-NEXT: [[CMPXCHG_PAIR24:%.*]] = cmpxchg ptr [[J]], i16 [[CMPXCHG_EXPECTED22]], i16 [[CMPXCHG_DESIRED23]] seq_cst seq_cst, align 2
// NATIVE-NEXT: [[CMPXCHG_PREV25:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR24]], 0
-// NATIVE-NEXT: [[CMPXCHG_SUCCESS26:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR24]], 1
// NATIVE-NEXT: store i16 [[CMPXCHG_PREV25]], ptr [[ATOMIC_TEMP21]], align 2
+// NATIVE-NEXT: [[CMPXCHG_SUCCESS26:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR24]], 1
// NATIVE-NEXT: [[TMP6]] = load i16, ptr [[ATOMIC_TEMP21]], align 2
// NATIVE-NEXT: br i1 [[CMPXCHG_SUCCESS26]], label %[[ATOMIC_CONT18:.*]], label %[[ATOMIC_OP14]]
// NATIVE: [[ATOMIC_CONT18]]:
@@ -89,10 +89,10 @@
// LIBCALL-NEXT: [[ATOMIC_TEMP7:%.*]] = alloca i32, align 4
// LIBCALL-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4
// LIBCALL-NEXT: [[ATOMIC_TEMP9:%.*]] = alloca i32, align 4
-// LIBCALL-NEXT: [[ATOMIC_TEMP16:%.*]] = alloca i16, align 2
+// LIBCALL-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i16, align 2
+// LIBCALL-NEXT: [[ATOMIC_TEMP18:%.*]] = alloca i16, align 2
+// LIBCALL-NEXT: [[ATOMIC_TEMP19:%.*]] = alloca i16, align 2
// LIBCALL-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i16, align 2
-// LIBCALL-NEXT: [[ATOMIC_TEMP21:%.*]] = alloca i16, align 2
-// LIBCALL-NEXT: [[ATOMIC_TEMP22:%.*]] = alloca i16, align 2
// LIBCALL-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
// LIBCALL-NEXT: store i32 0, ptr [[I]], align 4
// LIBCALL-NEXT: store i16 0, ptr [[J]], align 2
@@ -104,12 +104,10 @@
// LIBCALL-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP1]], 2
// LIBCALL-NEXT: store i32 [[TMP1]], ptr [[ATOMIC_TEMP1]], align 4
// LIBCALL-NEXT: store i32 [[MUL]], ptr [[ATOMIC_TEMP2]], align 4
-// LIBCALL-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[ATOMIC_TEMP1]], align 4
// LIBCALL-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[ATOMIC_TEMP2]], align 4
-// LIBCALL-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[I]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 4
-// LIBCALL-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// LIBCALL-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
-// LIBCALL-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP3]], align 4
+// LIBCALL-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_4:%.*]] = call i8 @__atomic_compare_exchange_4(ptr [[I]], ptr [[ATOMIC_TEMP1]], i32 [[CMPXCHG_DESIRED]], i32 5, i32 5)
+// LIBCALL-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_4]], 0
+// LIBCALL-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP3]], ptr [[ATOMIC_TEMP1]], i64 4, i1 false)
// LIBCALL-NEXT: [[TMP2]] = load i32, ptr [[ATOMIC_TEMP3]], align 4
// LIBCALL-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
// LIBCALL: [[ATOMIC_CONT]]:
@@ -121,35 +119,31 @@
// LIBCALL-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 2
// LIBCALL-NEXT: store i32 [[TMP4]], ptr [[ATOMIC_TEMP7]], align 4
// LIBCALL-NEXT: store i32 [[DIV]], ptr [[ATOMIC_TEMP8]], align 4
-// LIBCALL-NEXT: [[CMPXCHG_EXPECTED10:%.*]] = load i32, ptr [[ATOMIC_TEMP7]], align 4
-// LIBCALL-NEXT: [[CMPXCHG_DESIRED11:%.*]] = load i32, ptr [[ATOMIC_TEMP8]], align 4
-// LIBCALL-NEXT: [[CMPXCHG_PAIR12:%.*]] = cmpxchg ptr [[I]], i32 [[CMPXCHG_EXPECTED10]], i32 [[CMPXCHG_DESIRED11]] seq_cst seq_cst, align 4
-// LIBCALL-NEXT: [[CMPXCHG_PREV13:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR12]], 0
-// LIBCALL-NEXT: [[CMPXCHG_SUCCESS14:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR12]], 1
-// LIBCALL-NEXT: store i32 [[CMPXCHG_PREV13]], ptr [[ATOMIC_TEMP9]], align 4
+// LIBCALL-NEXT: [[CMPXCHG_DESIRED10:%.*]] = load i32, ptr [[ATOMIC_TEMP8]], align 4
+// LIBCALL-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_411:%.*]] = call i8 @__atomic_compare_exchange_4(ptr [[I]], ptr [[ATOMIC_TEMP7]], i32 [[CMPXCHG_DESIRED10]], i32 5, i32 5)
+// LIBCALL-NEXT: [[CMPXCHG_SUCCESS12:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_411]], 0
+// LIBCALL-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP9]], ptr [[ATOMIC_TEMP7]], i64 4, i1 false)
// LIBCALL-NEXT: [[TMP5]] = load i32, ptr [[ATOMIC_TEMP9]], align 4
-// LIBCALL-NEXT: br i1 [[CMPXCHG_SUCCESS14]], label %[[ATOMIC_CONT6:.*]], label %[[ATOMIC_OP4]]
+// LIBCALL-NEXT: br i1 [[CMPXCHG_SUCCESS12]], label %[[ATOMIC_CONT6:.*]], label %[[ATOMIC_OP4]]
// LIBCALL: [[ATOMIC_CONT6]]:
// LIBCALL-NEXT: [[TMP6:%.*]] = load i32, ptr [[X_ADDR]], align 4
-// LIBCALL-NEXT: call void @__atomic_load(i32 noundef 2, ptr noundef [[J]], ptr noundef [[ATOMIC_TEMP16]], i32 noundef 5)
-// LIBCALL-NEXT: [[TMP7:%.*]] = load i16, ptr [[ATOMIC_TEMP16]], align 2
-// LIBCALL-NEXT: br label %[[ATOMIC_OP15:.*]]
-// LIBCALL: [[ATOMIC_OP15]]:
-// LIBCALL-NEXT: [[TMP8:%.*]] = phi i16 [ [[TMP7]], %[[ATOMIC_CONT6]] ], [ [[TMP9:%.*]], %[[ATOMIC_OP15]] ]
+// LIBCALL-NEXT: call void @__atomic_load(i32 noundef 2, ptr noundef [[J]], ptr noundef [[ATOMIC_TEMP14]], i32 noundef 5)
+// LIBCALL-NEXT: [[TMP7:%.*]] = load i16, ptr [[ATOMIC_TEMP14]], align 2
+// LIBCALL-NEXT: br label %[[ATOMIC_OP13:.*]]
+// LIBCALL: [[ATOMIC_OP13]]:
+// LIBCALL-NEXT: [[TMP8:%.*]] = phi i16 [ [[TMP7]], %[[ATOMIC_CONT6]] ], [ [[TMP9:%.*]], %[[ATOMIC_OP13]] ]
// LIBCALL-NEXT: [[CONV:%.*]] = zext i16 [[TMP8]] to i32
-// LIBCALL-NEXT: [[DIV17:%.*]] = sdiv i32 [[CONV]], [[TMP6]]
-// LIBCALL-NEXT: [[CONV18:%.*]] = trunc i32 [[DIV17]] to i16
-// LIBCALL-NEXT: store i16 [[TMP8]], ptr [[ATOMIC_TEMP20]], align 2
-// LIBCALL-NEXT: store i16 [[CONV18]], ptr [[ATOMIC_TEMP21]], align 2
-// LIBCALL-NEXT: [[CMPXCHG_EXPECTED23:%.*]] = load i16, ptr [[ATOMIC_TEMP20]], align 2
-// LIBCALL-NEXT: [[CMPXCHG_DESIRED24:%.*]] = load i16, ptr [[ATOMIC_TEMP21]], align 2
-// LIBCALL-NEXT: [[CMPXCHG_PAIR25:%.*]] = cmpxchg ptr [[J]], i16 [[CMPXCHG_EXPECTED23]], i16 [[CMPXCHG_DESIRED24]] seq_cst seq_cst, align 2
-// LIBCALL-NEXT: [[CMPXCHG_PREV26:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR25]], 0
-// LIBCALL-NEXT: [[CMPXCHG_SUCCESS27:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR25]], 1
-// LIBCALL-NEXT: store i16 [[CMPXCHG_PREV26]], ptr [[ATOMIC_TEMP22]], align 2
-// LIBCALL-NEXT: [[TMP9]] = load i16, ptr [[ATOMIC_TEMP22]], align 2
-// LIBCALL-NEXT: br i1 [[CMPXCHG_SUCCESS27]], label %[[ATOMIC_CONT19:.*]], label %[[ATOMIC_OP15]]
-// LIBCALL: [[ATOMIC_CONT19]]:
+// LIBCALL-NEXT: [[DIV15:%.*]] = sdiv i32 [[CONV]], [[TMP6]]
+// LIBCALL-NEXT: [[CONV16:%.*]] = trunc i32 [[DIV15]] to i16
+// LIBCALL-NEXT: store i16 [[TMP8]], ptr [[ATOMIC_TEMP18]], align 2
+// LIBCALL-NEXT: store i16 [[CONV16]], ptr [[ATOMIC_TEMP19]], align 2
+// LIBCALL-NEXT: [[CMPXCHG_DESIRED21:%.*]] = load i16, ptr [[ATOMIC_TEMP19]], align 2
+// LIBCALL-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_2:%.*]] = call i8 @__atomic_compare_exchange_2(ptr [[J]], ptr [[ATOMIC_TEMP18]], i16 [[CMPXCHG_DESIRED21]], i32 5, i32 5)
+// LIBCALL-NEXT: [[CMPXCHG_SUCCESS22:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_2]], 0
+// LIBCALL-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP20]], ptr [[ATOMIC_TEMP18]], i64 2, i1 false)
+// LIBCALL-NEXT: [[TMP9]] = load i16, ptr [[ATOMIC_TEMP20]], align 2
+// LIBCALL-NEXT: br i1 [[CMPXCHG_SUCCESS22]], label %[[ATOMIC_CONT17:.*]], label %[[ATOMIC_OP13]]
+// LIBCALL: [[ATOMIC_CONT17]]:
// LIBCALL-NEXT: ret void
//
void foo(int x)
@@ -330,8 +324,8 @@ _Atomic(int) compound_and(_Atomic(int) in) {
// NATIVE-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[ATOMIC_TEMP1]], align 4
// NATIVE-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[IN_ADDR]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 4
// NATIVE-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// NATIVE-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// NATIVE-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP2]], align 4
+// NATIVE-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// NATIVE-NEXT: [[TMP1]] = load i32, ptr [[ATOMIC_TEMP2]], align 4
// NATIVE-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
// NATIVE: [[ATOMIC_CONT]]:
@@ -354,12 +348,10 @@ _Atomic(int) compound_and(_Atomic(int) in) {
// LIBCALL-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP1]], 5
// LIBCALL-NEXT: store i32 [[TMP1]], ptr [[ATOMIC_TEMP1]], align 4
// LIBCALL-NEXT: store i32 [[MUL]], ptr [[ATOMIC_TEMP2]], align 4
-// LIBCALL-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[ATOMIC_TEMP1]], align 4
// LIBCALL-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[ATOMIC_TEMP2]], align 4
-// LIBCALL-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[IN_ADDR]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 4
-// LIBCALL-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// LIBCALL-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
-// LIBCALL-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP3]], align 4
+// LIBCALL-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_4:%.*]] = call i8 @__atomic_compare_exchange_4(ptr [[IN_ADDR]], ptr [[ATOMIC_TEMP1]], i32 [[CMPXCHG_DESIRED]], i32 5, i32 5)
+// LIBCALL-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_4]], 0
+// LIBCALL-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP3]], ptr [[ATOMIC_TEMP1]], i64 4, i1 false)
// LIBCALL-NEXT: [[TMP2]] = load i32, ptr [[ATOMIC_TEMP3]], align 4
// LIBCALL-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
// LIBCALL: [[ATOMIC_CONT]]:
diff --git a/clang/test/CodeGen/c11atomics.c b/clang/test/CodeGen/c11atomics.c
index 3c625f226a9d00..8556e3b940661a 100644
--- a/clang/test/CodeGen/c11atomics.c
+++ b/clang/test/CodeGen/c11atomics.c
@@ -91,12 +91,10 @@ void testinc(void)
// CHECK-NEXT: [[DEC:%.*]] = add i8 [[TMP1]], -1
// CHECK-NEXT: store i8 [[TMP1]], ptr [[ATOMIC_TEMP1]], align 1
// CHECK-NEXT: store i8 [[DEC]], ptr [[ATOMIC_TEMP2]], align 1
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i8, ptr [[ATOMIC_TEMP1]], align 1
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i8, ptr [[ATOMIC_TEMP2]], align 1
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @b, i8 [[CMPXCHG_EXPECTED]], i8 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 1
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: store i8 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP3]], align 1
+// CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_1:%.*]] = call i8 @__atomic_compare_exchange_1(ptr @b, ptr [[ATOMIC_TEMP1]], i8 [[CMPXCHG_DESIRED]], i32 5, i32 5)
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_1]], 0
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP3]], ptr [[ATOMIC_TEMP1]], i64 1, i1 false)
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP3]], align 1
// CHECK-NEXT: [[LOADEDV4:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: [[STOREDV5]] = zext i1 [[LOADEDV4]] to i8
@@ -111,20 +109,18 @@ void testinc(void)
// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV7]] to i8
// CHECK-NEXT: br label %[[ATOMIC_OP8:.*]]
// CHECK: [[ATOMIC_OP8]]:
-// CHECK-NEXT: [[TMP7:%.*]] = phi i8 [ [[STOREDV9]], %[[ATOMIC_CONT]] ], [ [[STOREDV21:%.*]], %[[ATOMIC_OP8]] ]
+// CHECK-NEXT: [[TMP7:%.*]] = phi i8 [ [[STOREDV9]], %[[ATOMIC_CONT]] ], [ [[STOREDV19:%.*]], %[[ATOMIC_OP8]] ]
// CHECK-NEXT: [[DEC10:%.*]] = add i8 [[TMP7]], -1
// CHECK-NEXT: store i8 [[TMP7]], ptr [[ATOMIC_TEMP12]], align 1
// CHECK-NEXT: store i8 [[DEC10]], ptr [[ATOMIC_TEMP13]], align 1
-// CHECK-NEXT: [[CMPXCHG_EXPECTED15:%.*]] = load i8, ptr [[ATOMIC_TEMP12]], align 1
-// CHECK-NEXT: [[CMPXCHG_DESIRED16:%.*]] = load i8, ptr [[ATOMIC_TEMP13]], align 1
-// CHECK-NEXT: [[CMPXCHG_PAIR17:%.*]] = cmpxchg ptr @b, i8 [[CMPXCHG_EXPECTED15]], i8 [[CMPXCHG_DESIRED16]] seq_cst seq_cst, align 1
-// CHECK-NEXT: [[CMPXCHG_PREV18:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR17]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS19:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR17]], 1
-// CHECK-NEXT: store i8 [[CMPXCHG_PREV18]], ptr [[ATOMIC_TEMP14]], align 1
+// CHECK-NEXT: [[CMPXCHG_DESIRED15:%.*]] = load i8, ptr [[ATOMIC_TEMP13]], align 1
+// CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_116:%.*]] = call i8 @__atomic_compare_exchange_1(ptr @b, ptr [[ATOMIC_TEMP12]], i8 [[CMPXCHG_DESIRED15]], i32 5, i32 5)
+// CHECK-NEXT: [[CMPXCHG_SUCCESS17:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_116]], 0
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP14]], ptr [[ATOMIC_TEMP12]], i64 1, i1 false)
// CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ATOMIC_TEMP14]], align 1
-// CHECK-NEXT: [[LOADEDV20:%.*]] = trunc i8 [[TMP8]] to i1
-// CHECK-NEXT: [[STOREDV21]] = zext i1 [[LOADEDV20]] to i8
-// CHECK-NEXT: br i1 [[CMPXCHG_SUCCESS19]], label %[[ATOMIC_CONT11:.*]], label %[[ATOMIC_OP8]]
+// CHECK-NEXT: [[LOADEDV18:%.*]] = trunc i8 [[TMP8]] to i1
+// CHECK-NEXT: [[STOREDV19]] = zext i1 [[LOADEDV18]] to i8
+// CHECK-NEXT: br i1 [[CMPXCHG_SUCCESS17]], label %[[ATOMIC_CONT11:.*]], label %[[ATOMIC_OP8]]
// CHECK: [[ATOMIC_CONT11]]:
// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw sub ptr @i, i32 1 seq_cst, align 4
// CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[TMP9]], 1
@@ -164,12 +160,10 @@ void testdec(void)
// CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i8
// CHECK-NEXT: store i8 [[TMP1]], ptr [[ATOMIC_TEMP2]], align 1
// CHECK-NEXT: store i8 [[CONV1]], ptr [[ATOMIC_TEMP3]], align 1
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i8, ptr [[ATOMIC_TEMP2]], align 1
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i8, ptr [[ATOMIC_TEMP3]], align 1
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @b, i8 [[CMPXCHG_EXPECTED]], i8 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 1
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: store i8 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP4]], align 1
+// CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_1:%.*]] = call i8 @__atomic_compare_exchange_1(ptr @b, ptr [[ATOMIC_TEMP2]], i8 [[CMPXCHG_DESIRED]], i32 5, i32 5)
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_1]], 0
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP4]], ptr [[ATOMIC_TEMP2]], i64 1, i1 false)
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP4]], align 1
// CHECK-NEXT: [[LOADEDV5:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: [[STOREDV6]] = zext i1 [[LOADEDV5]] to i8
@@ -209,12 +203,10 @@ void testaddeq(void)
// CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[SUB]] to i8
// CHECK-NEXT: store i8 [[TMP1]], ptr [[ATOMIC_TEMP2]], align 1
// CHECK-NEXT: store i8 [[CONV1]], ptr [[ATOMIC_TEMP3]], align 1
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i8, ptr [[ATOMIC_TEMP2]], align 1
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i8, ptr [[ATOMIC_TEMP3]], align 1
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @b, i8 [[CMPXCHG_EXPECTED]], i8 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 1
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: store i8 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP4]], align 1
+// CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_1:%.*]] = call i8 @__atomic_compare_exchange_1(ptr @b, ptr [[ATOMIC_TEMP2]], i8 [[CMPXCHG_DESIRED]], i32 5, i32 5)
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_1]], 0
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP4]], ptr [[ATOMIC_TEMP2]], i64 1, i1 false)
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP4]], align 1
// CHECK-NEXT: [[LOADEDV5:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: [[STOREDV6]] = zext i1 [[LOADEDV5]] to i8
@@ -254,12 +246,10 @@ void testsubeq(void)
// CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[XOR]] to i8
// CHECK-NEXT: store i8 [[TMP1]], ptr [[ATOMIC_TEMP2]], align 1
// CHECK-NEXT: store i8 [[CONV1]], ptr [[ATOMIC_TEMP3]], align 1
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i8, ptr [[ATOMIC_TEMP2]], align 1
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i8, ptr [[ATOMIC_TEMP3]], align 1
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @b, i8 [[CMPXCHG_EXPECTED]], i8 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 1
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: store i8 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP4]], align 1
+// CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_1:%.*]] = call i8 @__atomic_compare_exchange_1(ptr @b, ptr [[ATOMIC_TEMP2]], i8 [[CMPXCHG_DESIRED]], i32 5, i32 5)
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_1]], 0
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP4]], ptr [[ATOMIC_TEMP2]], i64 1, i1 false)
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP4]], align 1
// CHECK-NEXT: [[LOADEDV5:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: [[STOREDV6]] = zext i1 [[LOADEDV5]] to i8
@@ -299,12 +289,10 @@ void testxoreq(void)
// CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[OR]] to i8
// CHECK-NEXT: store i8 [[TMP1]], ptr [[ATOMIC_TEMP2]], align 1
// CHECK-NEXT: store i8 [[CONV1]], ptr [[ATOMIC_TEMP3]], align 1
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i8, ptr [[ATOMIC_TEMP2]], align 1
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i8, ptr [[ATOMIC_TEMP3]], align 1
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @b, i8 [[CMPXCHG_EXPECTED]], i8 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 1
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: store i8 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP4]], align 1
+// CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_1:%.*]] = call i8 @__atomic_compare_exchange_1(ptr @b, ptr [[ATOMIC_TEMP2]], i8 [[CMPXCHG_DESIRED]], i32 5, i32 5)
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_1]], 0
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP4]], ptr [[ATOMIC_TEMP2]], i64 1, i1 false)
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP4]], align 1
// CHECK-NEXT: [[LOADEDV5:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: [[STOREDV6]] = zext i1 [[LOADEDV5]] to i8
@@ -344,12 +332,10 @@ void testoreq(void)
// CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[AND]] to i8
// CHECK-NEXT: store i8 [[TMP1]], ptr [[ATOMIC_TEMP2]], align 1
// CHECK-NEXT: store i8 [[CONV1]], ptr [[ATOMIC_TEMP3]], align 1
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i8, ptr [[ATOMIC_TEMP2]], align 1
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i8, ptr [[ATOMIC_TEMP3]], align 1
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @b, i8 [[CMPXCHG_EXPECTED]], i8 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 1
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: store i8 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP4]], align 1
+// CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_1:%.*]] = call i8 @__atomic_compare_exchange_1(ptr @b, ptr [[ATOMIC_TEMP2]], i8 [[CMPXCHG_DESIRED]], i32 5, i32 5)
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_1]], 0
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP4]], ptr [[ATOMIC_TEMP2]], i64 1, i1 false)
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP4]], align 1
// CHECK-NEXT: [[LOADEDV5:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: [[STOREDV6]] = zext i1 [[LOADEDV5]] to i8
@@ -647,12 +633,9 @@ PS test_promoted_exchange(_Atomic(PS) *addr, PS *val) {
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[DOTATOMICTMP]], ptr align 2 [[TMP2]], i32 6, i1 false)
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_TEMP]], ptr align 2 [[TMP1]], i64 6, i1 false)
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_TEMP1]], ptr align 2 [[DOTATOMICTMP]], i64 6, i1 false)
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i64, ptr [[ATOMIC_TEMP]], align 8
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[ATOMIC_TEMP1]], align 8
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 8
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP]], align 8
+// CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_8:%.*]] = call i8 @__atomic_compare_exchange_8(ptr [[TMP0]], ptr [[ATOMIC_TEMP]], i64 [[CMPXCHG_DESIRED]], i32 5, i32 5)
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_8]], 0
// CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1
// CHECK-NEXT: ret i1 [[LOADEDV]]
diff --git a/clang/test/CodeGen/stack-arg-probe.c b/clang/test/CodeGen/stack-arg-probe.c
index 255ae482b68083..1babaa959c7100 100644
--- a/clang/test/CodeGen/stack-arg-probe.c
+++ b/clang/test/CodeGen/stack-arg-probe.c
@@ -1,10 +1,8 @@
// RUN: %clang_cc1 %s -triple=i686-windows-msvc -emit-llvm -o - -mno-stack-arg-probe | FileCheck %s -check-prefix=NO-STACKPROBE
// RUN: %clang_cc1 %s -triple=x86_64-windows-msvc -emit-llvm -o - -mno-stack-arg-probe | FileCheck %s -check-prefix=NO-STACKPROBE
-// RUN: %clang_cc1 %s -triple=armv7-windows-msvc -emit-llvm -o - -mno-stack-arg-probe | FileCheck %s -check-prefix=NO-STACKPROBE
// RUN: %clang_cc1 %s -triple=aarch64-windows-msvc -emit-llvm -o - -mno-stack-arg-probe | FileCheck %s -check-prefix=NO-STACKPROBE
// RUN: %clang_cc1 %s -triple=i686-windows-msvc -emit-llvm -o - | FileCheck %s -check-prefix=STACKPROBE
// RUN: %clang_cc1 %s -triple=x86_64-windows-msvc -emit-llvm -o - | FileCheck %s -check-prefix=STACKPROBE
-// RUN: %clang_cc1 %s -triple=armv7-windows-msvc -emit-llvm -o - | FileCheck %s -check-prefix=STACKPROBE
// RUN: %clang_cc1 %s -triple=aarch64-windows-msvc -emit-llvm -o - | FileCheck %s -check-prefix=STACKPROBE
diff --git a/clang/test/CodeGenCUDA/atomic-ops.cu b/clang/test/CodeGenCUDA/atomic-ops.cu
index 8a0b984971c455..c9c787001610c8 100644
--- a/clang/test/CodeGenCUDA/atomic-ops.cu
+++ b/clang/test/CodeGenCUDA/atomic-ops.cu
@@ -69,8 +69,8 @@
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("singlethread-one-as") monotonic monotonic, align 4
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
@@ -82,8 +82,8 @@
// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i32 [[CMPXCHG_EXPECTED3]], i32 [[CMPXCHG_DESIRED4]] syncscope("singlethread-one-as") monotonic monotonic, align 4
// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
@@ -305,8 +305,8 @@ __device__ unsigned int atomicu32_op_singlethread(unsigned int *ptr, unsigned in
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("wavefront-one-as") monotonic monotonic, align 4
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
@@ -318,8 +318,8 @@ __device__ unsigned int atomicu32_op_singlethread(unsigned int *ptr, unsigned in
// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i32 [[CMPXCHG_EXPECTED3]], i32 [[CMPXCHG_DESIRED4]] syncscope("wavefront-one-as") monotonic monotonic, align 4
// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
@@ -539,8 +539,8 @@ __device__ unsigned int atomicu32_op_wavefront(unsigned int *ptr, unsigned int v
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("workgroup-one-as") monotonic monotonic, align 4
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
@@ -552,8 +552,8 @@ __device__ unsigned int atomicu32_op_wavefront(unsigned int *ptr, unsigned int v
// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i32 [[CMPXCHG_EXPECTED3]], i32 [[CMPXCHG_DESIRED4]] syncscope("workgroup-one-as") monotonic monotonic, align 4
// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
@@ -767,8 +767,8 @@ __device__ unsigned int atomicu32_op_workgroup(unsigned int *ptr, unsigned int v
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("agent-one-as") monotonic monotonic, align 4
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
@@ -780,8 +780,8 @@ __device__ unsigned int atomicu32_op_workgroup(unsigned int *ptr, unsigned int v
// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i32 [[CMPXCHG_EXPECTED3]], i32 [[CMPXCHG_DESIRED4]] syncscope("agent-one-as") monotonic monotonic, align 4
// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
@@ -997,8 +997,8 @@ __device__ unsigned int atomicu32_op_agent(unsigned int *ptr, unsigned int val,
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("one-as") monotonic monotonic, align 4
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
@@ -1010,8 +1010,8 @@ __device__ unsigned int atomicu32_op_agent(unsigned int *ptr, unsigned int val,
// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i32 [[CMPXCHG_EXPECTED3]], i32 [[CMPXCHG_DESIRED4]] syncscope("one-as") monotonic monotonic, align 4
// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
@@ -1234,8 +1234,8 @@ __device__ unsigned int atomicu32_op_system(unsigned int *ptr, unsigned int val,
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] syncscope("singlethread-one-as") monotonic monotonic, align 8
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
@@ -1247,8 +1247,8 @@ __device__ unsigned int atomicu32_op_system(unsigned int *ptr, unsigned int val,
// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i64 [[CMPXCHG_EXPECTED3]], i64 [[CMPXCHG_DESIRED4]] syncscope("singlethread-one-as") monotonic monotonic, align 8
// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: store i64 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
@@ -1486,8 +1486,8 @@ __device__ unsigned long long atomicu64_op_singlethread(unsigned long long *ptr,
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] syncscope("wavefront-one-as") monotonic monotonic, align 8
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
@@ -1499,8 +1499,8 @@ __device__ unsigned long long atomicu64_op_singlethread(unsigned long long *ptr,
// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i64 [[CMPXCHG_EXPECTED3]], i64 [[CMPXCHG_DESIRED4]] syncscope("wavefront-one-as") monotonic monotonic, align 8
// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: store i64 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
@@ -1742,8 +1742,8 @@ __device__ unsigned long long atomicu64_op_wavefront(unsigned long long *ptr, un
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] syncscope("workgroup-one-as") monotonic monotonic, align 8
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
@@ -1755,8 +1755,8 @@ __device__ unsigned long long atomicu64_op_wavefront(unsigned long long *ptr, un
// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i64 [[CMPXCHG_EXPECTED3]], i64 [[CMPXCHG_DESIRED4]] syncscope("workgroup-one-as") monotonic monotonic, align 8
// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: store i64 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
@@ -1984,8 +1984,8 @@ __device__ unsigned long long atomicu64_op_workgroup(unsigned long long *ptr, un
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] syncscope("agent-one-as") monotonic monotonic, align 8
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
@@ -1997,8 +1997,8 @@ __device__ unsigned long long atomicu64_op_workgroup(unsigned long long *ptr, un
// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i64 [[CMPXCHG_EXPECTED3]], i64 [[CMPXCHG_DESIRED4]] syncscope("agent-one-as") monotonic monotonic, align 8
// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: store i64 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
@@ -2228,8 +2228,8 @@ __device__ unsigned long long atomicu64_op_agent(unsigned long long *ptr, unsign
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] syncscope("one-as") monotonic monotonic, align 8
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
@@ -2241,8 +2241,8 @@ __device__ unsigned long long atomicu64_op_agent(unsigned long long *ptr, unsign
// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i64 [[CMPXCHG_EXPECTED3]], i64 [[CMPXCHG_DESIRED4]] syncscope("one-as") monotonic monotonic, align 8
// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: store i64 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
diff --git a/clang/test/CodeGenCXX/atomic-inline.cpp b/clang/test/CodeGenCXX/atomic-inline.cpp
index c8fa877a37beb5..dc0c30c8f76878 100644
--- a/clang/test/CodeGenCXX/atomic-inline.cpp
+++ b/clang/test/CodeGenCXX/atomic-inline.cpp
@@ -1,3 +1,4 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 %s -std=c++11 -emit-llvm -o - -triple=x86_64-linux-gnu | FileCheck %s
// RUN: %clang_cc1 %s -std=c++11 -emit-llvm -o - -triple=x86_64-linux-gnu -target-cpu core2 | FileCheck %s --check-prefix=CORE2
// Check the atomic code generation for cpu targets w/wo cx16 support.
@@ -6,31 +7,85 @@ struct alignas(8) AM8 {
int f1, f2;
};
AM8 m8;
+// CHECK-LABEL: define dso_local i64 @_Z5load8v(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_AM8:%.*]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load atomic i64, ptr @m8 monotonic, align 8
+// CHECK-NEXT: store i64 [[TMP0]], ptr [[RETVAL]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[RETVAL]], align 8
+// CHECK-NEXT: ret i64 [[TMP1]]
+//
+// CORE2-LABEL: define dso_local i64 @_Z5load8v(
+// CORE2-SAME: ) #[[ATTR0:[0-9]+]] {
+// CORE2-NEXT: [[ENTRY:.*:]]
+// CORE2-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_AM8:%.*]], align 8
+// CORE2-NEXT: [[TMP0:%.*]] = load atomic i64, ptr @m8 monotonic, align 8
+// CORE2-NEXT: store i64 [[TMP0]], ptr [[RETVAL]], align 8
+// CORE2-NEXT: [[TMP1:%.*]] = load i64, ptr [[RETVAL]], align 8
+// CORE2-NEXT: ret i64 [[TMP1]]
+//
AM8 load8() {
AM8 am;
- // CHECK-LABEL: @_Z5load8v
- // CHECK: load atomic i64, {{.*}} monotonic, align 8
- // CORE2-LABEL: @_Z5load8v
- // CORE2: load atomic i64, {{.*}} monotonic, align 8
__atomic_load(&m8, &am, 0);
return am;
}
AM8 s8;
+// CHECK-LABEL: define dso_local void @_Z6store8v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @s8, align 8
+// CHECK-NEXT: store atomic i64 [[TMP0]], ptr @m8 monotonic, align 8
+// CHECK-NEXT: ret void
+//
+// CORE2-LABEL: define dso_local void @_Z6store8v(
+// CORE2-SAME: ) #[[ATTR0]] {
+// CORE2-NEXT: [[ENTRY:.*:]]
+// CORE2-NEXT: [[TMP0:%.*]] = load i64, ptr @s8, align 8
+// CORE2-NEXT: store atomic i64 [[TMP0]], ptr @m8 monotonic, align 8
+// CORE2-NEXT: ret void
+//
void store8() {
- // CHECK-LABEL: @_Z6store8v
- // CHECK: store atomic i64 {{.*}} monotonic, align 8
- // CORE2-LABEL: @_Z6store8v
- // CORE2: store atomic i64 {{.*}} monotonic, align 8
__atomic_store(&m8, &s8, 0);
}
+// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z8cmpxchg8v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[AM:%.*]] = alloca [[STRUCT_AM8:%.*]], align 8
+// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i64, ptr @s8, align 8
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[AM]], align 8
+// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @m8, i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] monotonic monotonic, align 8
+// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
+// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr @s8, align 8
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[CMPXCHG_SUCCESS]] to i8
+// CHECK-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK-NEXT: ret i1 [[LOADEDV]]
+//
+// CORE2-LABEL: define dso_local noundef zeroext i1 @_Z8cmpxchg8v(
+// CORE2-SAME: ) #[[ATTR0]] {
+// CORE2-NEXT: [[ENTRY:.*:]]
+// CORE2-NEXT: [[AM:%.*]] = alloca [[STRUCT_AM8:%.*]], align 8
+// CORE2-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1
+// CORE2-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i64, ptr @s8, align 8
+// CORE2-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[AM]], align 8
+// CORE2-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @m8, i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] monotonic monotonic, align 8
+// CORE2-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
+// CORE2-NEXT: store i64 [[CMPXCHG_PREV]], ptr @s8, align 8
+// CORE2-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
+// CORE2-NEXT: [[STOREDV:%.*]] = zext i1 [[CMPXCHG_SUCCESS]] to i8
+// CORE2-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1
+// CORE2-NEXT: [[TMP0:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1
+// CORE2-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CORE2-NEXT: ret i1 [[LOADEDV]]
+//
bool cmpxchg8() {
AM8 am;
- // CHECK-LABEL: @_Z8cmpxchg8v
- // CHECK: cmpxchg ptr {{.*}} monotonic, align 8
- // CORE2-LABEL: @_Z8cmpxchg8v
- // CORE2: cmpxchg ptr {{.*}} monotonic, align 8
return __atomic_compare_exchange(&m8, &s8, &am, 0, 0, 0);
}
@@ -39,30 +94,81 @@ struct alignas(16) AM16 {
};
AM16 m16;
+// CHECK-LABEL: define dso_local { i64, i64 } @_Z6load16v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_AM16:%.*]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load atomic i128, ptr @m16 monotonic, align 16
+// CHECK-NEXT: store i128 [[TMP0]], ptr [[RETVAL]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load { i64, i64 }, ptr [[RETVAL]], align 16
+// CHECK-NEXT: ret { i64, i64 } [[TMP1]]
+//
+// CORE2-LABEL: define dso_local { i64, i64 } @_Z6load16v(
+// CORE2-SAME: ) #[[ATTR0]] {
+// CORE2-NEXT: [[ENTRY:.*:]]
+// CORE2-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_AM16:%.*]], align 16
+// CORE2-NEXT: [[TMP0:%.*]] = load atomic i128, ptr @m16 monotonic, align 16
+// CORE2-NEXT: store i128 [[TMP0]], ptr [[RETVAL]], align 16
+// CORE2-NEXT: [[TMP1:%.*]] = load { i64, i64 }, ptr [[RETVAL]], align 16
+// CORE2-NEXT: ret { i64, i64 } [[TMP1]]
+//
AM16 load16() {
AM16 am;
- // CHECK-LABEL: @_Z6load16v
- // CHECK: load atomic i128, {{.*}} monotonic, align 16
- // CORE2-LABEL: @_Z6load16v
- // CORE2: load atomic i128, {{.*}} monotonic, align 16
__atomic_load(&m16, &am, 0);
return am;
}
AM16 s16;
+// CHECK-LABEL: define dso_local void @_Z7store16v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @s16, align 16
+// CHECK-NEXT: store atomic i128 [[TMP0]], ptr @m16 monotonic, align 16
+// CHECK-NEXT: ret void
+//
+// CORE2-LABEL: define dso_local void @_Z7store16v(
+// CORE2-SAME: ) #[[ATTR0]] {
+// CORE2-NEXT: [[ENTRY:.*:]]
+// CORE2-NEXT: [[TMP0:%.*]] = load i128, ptr @s16, align 16
+// CORE2-NEXT: store atomic i128 [[TMP0]], ptr @m16 monotonic, align 16
+// CORE2-NEXT: ret void
+//
void store16() {
- // CHECK-LABEL: @_Z7store16v
- // CHECK: store atomic i128 {{.*}} monotonic, align 16
- // CORE2-LABEL: @_Z7store16v
- // CORE2: store atomic i128 {{.*}} monotonic, align 16
__atomic_store(&m16, &s16, 0);
}
+// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z9cmpxchg16v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[AM:%.*]] = alloca [[STRUCT_AM16:%.*]], align 16
+// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i128, ptr [[AM]], align 16
+// CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_16:%.*]] = call i8 @__atomic_compare_exchange_16(ptr @m16, ptr @s16, i128 [[CMPXCHG_DESIRED]], i32 0, i32 0)
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_16]], 0
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[CMPXCHG_SUCCESS]] to i8
+// CHECK-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK-NEXT: ret i1 [[LOADEDV]]
+//
+// CORE2-LABEL: define dso_local noundef zeroext i1 @_Z9cmpxchg16v(
+// CORE2-SAME: ) #[[ATTR0]] {
+// CORE2-NEXT: [[ENTRY:.*:]]
+// CORE2-NEXT: [[AM:%.*]] = alloca [[STRUCT_AM16:%.*]], align 16
+// CORE2-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1
+// CORE2-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i128, ptr @s16, align 16
+// CORE2-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i128, ptr [[AM]], align 16
+// CORE2-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @m16, i128 [[CMPXCHG_EXPECTED]], i128 [[CMPXCHG_DESIRED]] monotonic monotonic, align 16
+// CORE2-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 0
+// CORE2-NEXT: store i128 [[CMPXCHG_PREV]], ptr @s16, align 16
+// CORE2-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 1
+// CORE2-NEXT: [[STOREDV:%.*]] = zext i1 [[CMPXCHG_SUCCESS]] to i8
+// CORE2-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1
+// CORE2-NEXT: [[TMP0:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1
+// CORE2-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CORE2-NEXT: ret i1 [[LOADEDV]]
+//
bool cmpxchg16() {
AM16 am;
- // CHECK-LABEL: @_Z9cmpxchg16v
- // CHECK: cmpxchg ptr {{.*}} monotonic monotonic, align 16
- // CORE2-LABEL: @_Z9cmpxchg16v
- // CORE2: cmpxchg ptr {{.*}} monotonic monotonic, align 16
return __atomic_compare_exchange(&m16, &s16, &am, 0, 0, 0);
}
diff --git a/clang/test/CodeGenOpenCL/atomic-ops.cl b/clang/test/CodeGenOpenCL/atomic-ops.cl
index a0a2ae4e63a879..a5e731a3fe30bc 100644
--- a/clang/test/CodeGenOpenCL/atomic-ops.cl
+++ b/clang/test/CodeGenOpenCL/atomic-ops.cl
@@ -210,8 +210,8 @@ void fi3(atomic_int *i, atomic_uint *ui) {
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("workgroup-one-as") acquire acquire, align 4
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr addrspace(5) [[CMP]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(5) [[CMPXCHG_BOOL]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP1]] to i1
// CHECK-NEXT: ret i1 [[LOADEDV]]
@@ -488,8 +488,8 @@ void atomic_init_foo()
// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("workgroup-one-as") acquire monotonic, align 4
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[TMP1]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(5) [[CMPXCHG_BOOL]], align 1
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr addrspace(5) [[PTR_ADDR]], align 8
@@ -499,8 +499,8 @@ void atomic_init_foo()
// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP1]], align 4
// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i32 [[CMPXCHG_EXPECTED3]], i32 [[CMPXCHG_DESIRED4]] syncscope("workgroup") seq_cst acquire, align 4
// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV6]], ptr [[TMP4]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr addrspace(5) [[CMPXCHG_BOOL2]], align 1
// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
// CHECK-NEXT: ret void
@@ -583,20 +583,20 @@ void failureOrder(atomic_int *ptr, int *ptr2) {
// CHECK: [[MONOTONIC_FAIL]]:
// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("workgroup-one-as") monotonic monotonic, align 4
// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[TMP2]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE1:.*]]
// CHECK: [[ACQUIRE_FAIL]]:
// CHECK-NEXT: [[CMPXCHG_PAIR2:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("workgroup-one-as") monotonic acquire, align 4
// CHECK-NEXT: [[CMPXCHG_PREV3:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR2]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS4:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR2]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV3]], ptr [[TMP2]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS4:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR2]], 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE1]]
// CHECK: [[SEQCST_FAIL]]:
// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("workgroup-one-as") monotonic seq_cst, align 4
// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV6]], ptr [[TMP2]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE1]]
// CHECK: [[ATOMIC_CONTINUE1]]:
// CHECK-NEXT: [[CMPXCGH_SUCCESS:%.*]] = phi i1 [ [[CMPXCHG_SUCCESS]], %[[MONOTONIC_FAIL]] ], [ [[CMPXCHG_SUCCESS4]], %[[ACQUIRE_FAIL]] ], [ [[CMPXCHG_SUCCESS7]], %[[SEQCST_FAIL]] ]
@@ -604,20 +604,20 @@ void failureOrder(atomic_int *ptr, int *ptr2) {
// CHECK: [[MONOTONIC_FAIL10]]:
// CHECK-NEXT: [[CMPXCHG_PAIR14:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED8]], i32 [[CMPXCHG_DESIRED9]] syncscope("workgroup-one-as") acquire monotonic, align 4
// CHECK-NEXT: [[CMPXCHG_PREV15:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR14]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS16:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR14]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV15]], ptr [[TMP2]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS16:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR14]], 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE13:.*]]
// CHECK: [[ACQUIRE_FAIL11]]:
// CHECK-NEXT: [[CMPXCHG_PAIR17:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED8]], i32 [[CMPXCHG_DESIRED9]] syncscope("workgroup-one-as") acquire acquire, align 4
// CHECK-NEXT: [[CMPXCHG_PREV18:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR17]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS19:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR17]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV18]], ptr [[TMP2]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS19:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR17]], 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE13]]
// CHECK: [[SEQCST_FAIL12]]:
// CHECK-NEXT: [[CMPXCHG_PAIR20:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED8]], i32 [[CMPXCHG_DESIRED9]] syncscope("workgroup-one-as") acquire seq_cst, align 4
// CHECK-NEXT: [[CMPXCHG_PREV21:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR20]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS22:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR20]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV21]], ptr [[TMP2]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS22:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR20]], 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE13]]
// CHECK: [[ATOMIC_CONTINUE13]]:
// CHECK-NEXT: [[CMPXCGH_SUCCESS23:%.*]] = phi i1 [ [[CMPXCHG_SUCCESS16]], %[[MONOTONIC_FAIL10]] ], [ [[CMPXCHG_SUCCESS19]], %[[ACQUIRE_FAIL11]] ], [ [[CMPXCHG_SUCCESS22]], %[[SEQCST_FAIL12]] ]
@@ -625,20 +625,20 @@ void failureOrder(atomic_int *ptr, int *ptr2) {
// CHECK: [[MONOTONIC_FAIL26]]:
// CHECK-NEXT: [[CMPXCHG_PAIR30:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED24]], i32 [[CMPXCHG_DESIRED25]] syncscope("workgroup-one-as") release monotonic, align 4
// CHECK-NEXT: [[CMPXCHG_PREV31:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR30]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS32:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR30]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV31]], ptr [[TMP2]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS32:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR30]], 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE29:.*]]
// CHECK: [[ACQUIRE_FAIL27]]:
// CHECK-NEXT: [[CMPXCHG_PAIR33:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED24]], i32 [[CMPXCHG_DESIRED25]] syncscope("workgroup-one-as") release acquire, align 4
// CHECK-NEXT: [[CMPXCHG_PREV34:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR33]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS35:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR33]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV34]], ptr [[TMP2]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS35:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR33]], 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE29]]
// CHECK: [[SEQCST_FAIL28]]:
// CHECK-NEXT: [[CMPXCHG_PAIR36:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED24]], i32 [[CMPXCHG_DESIRED25]] syncscope("workgroup-one-as") release seq_cst, align 4
// CHECK-NEXT: [[CMPXCHG_PREV37:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR36]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS38:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR36]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV37]], ptr [[TMP2]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS38:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR36]], 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE29]]
// CHECK: [[ATOMIC_CONTINUE29]]:
// CHECK-NEXT: [[CMPXCGH_SUCCESS39:%.*]] = phi i1 [ [[CMPXCHG_SUCCESS32]], %[[MONOTONIC_FAIL26]] ], [ [[CMPXCHG_SUCCESS35]], %[[ACQUIRE_FAIL27]] ], [ [[CMPXCHG_SUCCESS38]], %[[SEQCST_FAIL28]] ]
@@ -646,20 +646,20 @@ void failureOrder(atomic_int *ptr, int *ptr2) {
// CHECK: [[MONOTONIC_FAIL42]]:
// CHECK-NEXT: [[CMPXCHG_PAIR46:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED40]], i32 [[CMPXCHG_DESIRED41]] syncscope("workgroup-one-as") acq_rel monotonic, align 4
// CHECK-NEXT: [[CMPXCHG_PREV47:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR46]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS48:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR46]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV47]], ptr [[TMP2]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS48:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR46]], 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE45:.*]]
// CHECK: [[ACQUIRE_FAIL43]]:
// CHECK-NEXT: [[CMPXCHG_PAIR49:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED40]], i32 [[CMPXCHG_DESIRED41]] syncscope("workgroup-one-as") acq_rel acquire, align 4
// CHECK-NEXT: [[CMPXCHG_PREV50:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR49]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS51:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR49]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV50]], ptr [[TMP2]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS51:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR49]], 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE45]]
// CHECK: [[SEQCST_FAIL44]]:
// CHECK-NEXT: [[CMPXCHG_PAIR52:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED40]], i32 [[CMPXCHG_DESIRED41]] syncscope("workgroup-one-as") acq_rel seq_cst, align 4
// CHECK-NEXT: [[CMPXCHG_PREV53:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR52]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS54:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR52]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV53]], ptr [[TMP2]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS54:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR52]], 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE45]]
// CHECK: [[ATOMIC_CONTINUE45]]:
// CHECK-NEXT: [[CMPXCGH_SUCCESS55:%.*]] = phi i1 [ [[CMPXCHG_SUCCESS48]], %[[MONOTONIC_FAIL42]] ], [ [[CMPXCHG_SUCCESS51]], %[[ACQUIRE_FAIL43]] ], [ [[CMPXCHG_SUCCESS54]], %[[SEQCST_FAIL44]] ]
@@ -667,20 +667,20 @@ void failureOrder(atomic_int *ptr, int *ptr2) {
// CHECK: [[MONOTONIC_FAIL58]]:
// CHECK-NEXT: [[CMPXCHG_PAIR62:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED56]], i32 [[CMPXCHG_DESIRED57]] syncscope("workgroup") seq_cst monotonic, align 4
// CHECK-NEXT: [[CMPXCHG_PREV63:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR62]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS64:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR62]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV63]], ptr [[TMP2]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS64:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR62]], 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE61:.*]]
// CHECK: [[ACQUIRE_FAIL59]]:
// CHECK-NEXT: [[CMPXCHG_PAIR65:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED56]], i32 [[CMPXCHG_DESIRED57]] syncscope("workgroup") seq_cst acquire, align 4
// CHECK-NEXT: [[CMPXCHG_PREV66:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR65]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS67:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR65]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV66]], ptr [[TMP2]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS67:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR65]], 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE61]]
// CHECK: [[SEQCST_FAIL60]]:
// CHECK-NEXT: [[CMPXCHG_PAIR68:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED56]], i32 [[CMPXCHG_DESIRED57]] syncscope("workgroup") seq_cst seq_cst, align 4
// CHECK-NEXT: [[CMPXCHG_PREV69:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR68]], 0
-// CHECK-NEXT: [[CMPXCHG_SUCCESS70:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR68]], 1
// CHECK-NEXT: store i32 [[CMPXCHG_PREV69]], ptr [[TMP2]], align 4
+// CHECK-NEXT: [[CMPXCHG_SUCCESS70:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR68]], 1
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE61]]
// CHECK: [[ATOMIC_CONTINUE61]]:
// CHECK-NEXT: [[CMPXCGH_SUCCESS71:%.*]] = phi i1 [ [[CMPXCHG_SUCCESS64]], %[[MONOTONIC_FAIL58]] ], [ [[CMPXCHG_SUCCESS67]], %[[ACQUIRE_FAIL59]] ], [ [[CMPXCHG_SUCCESS70]], %[[SEQCST_FAIL60]] ]
diff --git a/llvm/lib/Transforms/Utils/BuildBuiltins.cpp b/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
index 7c68d18e93bdb1..245fc398186e3c 100644
--- a/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
+++ b/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
@@ -203,26 +203,31 @@ Value *llvm::emitAtomicCompareExchangeBuiltin(
processMemorder(FailureMemorder);
auto [ScopeConst, ScopeVal] = processScope(Scope);
- if (SuccessMemorderConst && FailureMemorderConst) {
- // https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
- //
- // [failure_memorder] This memory order cannot be __ATOMIC_RELEASE nor
- // __ATOMIC_ACQ_REL. It also cannot be a stronger order than that
- // specified by success_memorder.
- //
- // https://llvm.org/docs/LangRef.html#cmpxchg-instruction
- //
- // Both ordering parameters must be at least monotonic, the failure
- // ordering cannot be either release or acq_rel.
- //
- // Release/Acquire exception because of test/CodeGen/atomic-ops.c (function
- // "generalWeakness") regression test.
- assert(*FailureMemorderConst != AtomicOrdering::Release);
- assert(*FailureMemorderConst != AtomicOrdering::AcquireRelease);
- assert(
- isAtLeastOrStrongerThan(*SuccessMemorderConst, *FailureMemorderConst) ||
- (*SuccessMemorderConst == AtomicOrdering::Release &&
- *FailureMemorderConst == AtomicOrdering::Acquire));
+ // Fix malformed inputs. We do not want to emit illegal IR.
+ //
+ // https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
+ //
+ // [failure_memorder] This memory order cannot be __ATOMIC_RELEASE nor
+ // __ATOMIC_ACQ_REL. It also cannot be a stronger order than that
+ // specified by success_memorder.
+ //
+ // https://llvm.org/docs/LangRef.html#cmpxchg-instruction
+ //
+ // Both ordering parameters must be at least monotonic, the failure
+ // ordering cannot be either release or acq_rel.
+ //
+ if (FailureMemorderConst &&
+ ((*FailureMemorderConst == AtomicOrdering::Release) ||
+ (*FailureMemorderConst == AtomicOrdering::AcquireRelease))) {
+ // Fall back to monotonic atomic when illegal value is passed. As with the
+ // dynamic case below, it is an arbitrary choice.
+ FailureMemorderConst = AtomicOrdering::Monotonic;
+ }
+ if (FailureMemorderConst && SuccessMemorderConst &&
+ !isAtLeastOrStrongerThan(*SuccessMemorderConst, *FailureMemorderConst)) {
+ // Make SuccessMemorder as least as strong as FailureMemorder
+ SuccessMemorderConst =
+ getMergedAtomicOrdering(*SuccessMemorderConst, *FailureMemorderConst);
}
// https://llvm.org/docs/LangRef.html#cmpxchg-instruction
@@ -510,27 +515,28 @@ Value *llvm::emitAtomicCompareExchangeBuiltin(
return GenWeakSwitch();
}
- // Fallback to a libcall function. From here on IsWeak/IsVolatile is ignored.
- // IsWeak is assumed to be false and volatile does not apply to function
- // calls.
+ // Fallback to a libcall function. From here on IsWeak/Scope/IsVolatile is
+ // ignored. IsWeak is assumed to be false, Scope is assumed to be
+ // SyncScope::System (strongest possible assumption synchronizing with
+ // everything, instead of just a subset of sibling threads), and volatile does
+ // not apply to function calls.
// FIXME: Some AMDGCN regression tests the addrspace, but
// __atomic_compare_exchange by definition is addrsspace(0) and
// emitAtomicCompareExchange will complain about it.
- if (Ptr->getType()->getPointerAddressSpace())
+ if (Ptr->getType()->getPointerAddressSpace() ||
+ ExpectedPtr->getType()->getPointerAddressSpace() ||
+ DesiredPtr->getType()->getPointerAddressSpace())
return Builder.getInt1(false);
- assert(ScopeConst && *ScopeConst == SyncScope::System && !ScopeVal &&
- "Synchronization scopes not supported by libcall functions");
-
if (CanUseSizedLibcall && AllowSizedLibcall) {
LoadInst *DesiredVal =
- Builder.CreateLoad(CoercedTy, DesiredPtr, "cmpxchg.desired");
+ Builder.CreateLoad(IntegerType::get(Ctx, PreferredSize * 8), DesiredPtr,
+ "cmpxchg.desired");
Value *SuccessResult = emitAtomicCompareExchangeN(
PreferredSize, Ptr, ExpectedPtr, DesiredVal, SuccessMemorderCABI,
FailureMemorderCABI, Builder, DL, TLI);
if (SuccessResult) {
- assert(SuccessResult && "Must be able to emit libcall functions");
Value *SuccessBool =
Builder.CreateCmp(CmpInst::Predicate::ICMP_EQ, SuccessResult,
Builder.getInt8(0), "cmpxchg.success");
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i128.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i128.ll
index 23da2e8c15efe9..5e989c8614a0c6 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i128.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i128.ll
@@ -79,7 +79,7 @@ define i128 @test_atomicrmw_max_i128_global(ptr addrspace(1) %ptr, i128 %value)
; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i128 [[LOADED]], [[VALUE:%.*]]
; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i128 [[LOADED]], i128 [[VALUE]]
; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[CMPXCHG_EXPECTED_PTR]], align 8
@@ -89,7 +89,6 @@ define i128 @test_atomicrmw_max_i128_global(ptr addrspace(1) %ptr, i128 %value)
; CHECK-NEXT: [[TMP4:%.*]] = insertvalue { i128, i1 } [[TMP3]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP4]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP4]], 0
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret i128 [[NEWLOADED]]
@@ -106,7 +105,7 @@ define i128 @test_atomicrmw_min_i128_global(ptr addrspace(1) %ptr, i128 %value)
; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP2:%.*]] = icmp sle i128 [[LOADED]], [[VALUE:%.*]]
; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i128 [[LOADED]], i128 [[VALUE]]
; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[CMPXCHG_EXPECTED_PTR]], align 8
@@ -116,7 +115,6 @@ define i128 @test_atomicrmw_min_i128_global(ptr addrspace(1) %ptr, i128 %value)
; CHECK-NEXT: [[TMP4:%.*]] = insertvalue { i128, i1 } [[TMP3]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP4]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP4]], 0
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret i128 [[NEWLOADED]]
@@ -133,7 +131,7 @@ define i128 @test_atomicrmw_umax_i128_global(ptr addrspace(1) %ptr, i128 %value)
; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i128 [[LOADED]], [[VALUE:%.*]]
; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i128 [[LOADED]], i128 [[VALUE]]
; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[CMPXCHG_EXPECTED_PTR]], align 8
@@ -143,7 +141,6 @@ define i128 @test_atomicrmw_umax_i128_global(ptr addrspace(1) %ptr, i128 %value)
; CHECK-NEXT: [[TMP4:%.*]] = insertvalue { i128, i1 } [[TMP3]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP4]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP4]], 0
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret i128 [[NEWLOADED]]
@@ -160,7 +157,7 @@ define i128 @test_atomicrmw_umin_i128_global(ptr addrspace(1) %ptr, i128 %value)
; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i128 [[LOADED]], [[VALUE:%.*]]
; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i128 [[LOADED]], i128 [[VALUE]]
; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[CMPXCHG_EXPECTED_PTR]], align 8
@@ -170,7 +167,6 @@ define i128 @test_atomicrmw_umin_i128_global(ptr addrspace(1) %ptr, i128 %value)
; CHECK-NEXT: [[TMP4:%.*]] = insertvalue { i128, i1 } [[TMP3]], i1 false, 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP4]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP4]], 0
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret i128 [[NEWLOADED]]
@@ -188,12 +184,9 @@ define i128 @test_cmpxchg_i128_global(ptr addrspace(1) %out, i128 %in, i128 %old
; CHECK-NEXT: store i128 [[OLD:%.*]], ptr addrspace(5) [[CMPXCHG_EXPECTED_PTR]], align 8
; CHECK-NEXT: store i128 [[IN:%.*]], ptr addrspace(5) [[CMPXCHG_DESIRED_PTR]], align 8
; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load i128, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
-; CHECK-NEXT: [[TMP3:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP2]], ptr addrspace(5) [[TMP1]], i128 [[IN:%.*]], i32 5, i32 5)
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i128, i1 } poison, i128 [[CMPXCHG_PREV_LOAD]], 0
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { i128, i1 } [[TMP1]], i1 false, 1
; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = insertvalue { i128, i1 } poison, i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { i128, i1 } [[TMP5]], i1 [[TMP3]], 1
-; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { i128, i1 } [[TMP6]], 0
; CHECK-NEXT: ret i128 [[EXTRACT]]
;
%gep = getelementptr i128, ptr addrspace(1) %out, i64 4
>From a5dbc1df52cd11004a26fa6d4a2523760cf5bf34 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Tue, 5 Nov 2024 15:28:45 +0100
Subject: [PATCH 03/17] undo uses in CGAtomic and AtomicExpandPass
---
clang/include/clang/CodeGen/BackendUtil.h | 10 -
clang/include/clang/CodeGen/ModuleBuilder.h | 2 -
.../include/clang/Frontend/CompilerInstance.h | 5 -
clang/lib/CodeGen/BackendConsumer.h | 4 +-
clang/lib/CodeGen/BackendUtil.cpp | 8 +-
clang/lib/CodeGen/CGAtomic.cpp | 246 +-
clang/lib/CodeGen/CMakeLists.txt | 1 -
clang/lib/CodeGen/CodeGenAction.cpp | 11 +-
clang/lib/CodeGen/CodeGenFunction.cpp | 13 -
clang/lib/CodeGen/CodeGenFunction.h | 10 -
clang/lib/CodeGen/CodeGenModule.cpp | 16 +-
clang/lib/CodeGen/CodeGenModule.h | 10 +-
clang/lib/CodeGen/ModuleBuilder.cpp | 24 +-
.../CodeGen/ObjectFilePCHContainerWriter.cpp | 9 +-
clang/lib/Frontend/CMakeLists.txt | 1 -
clang/lib/Frontend/CompilerInstance.cpp | 32 -
.../PowerPC/builtins-ppc-xlcompat-cas.c | 42 +-
clang/test/CodeGen/PowerPC/quadword-atomics.c | 29 +-
.../SystemZ/gnu-atomic-builtins-i128-16Al.c | 34 +-
.../SystemZ/gnu-atomic-builtins-i128-8Al.c | 34 +-
.../CodeGen/SystemZ/gnu-atomic-builtins-i16.c | 34 +-
.../CodeGen/SystemZ/gnu-atomic-builtins-i32.c | 34 +-
.../CodeGen/SystemZ/gnu-atomic-builtins-i64.c | 34 +-
.../CodeGen/SystemZ/gnu-atomic-builtins-i8.c | 34 +-
.../test/CodeGen/X86/x86-atomic-long_double.c | 36 +-
clang/test/CodeGen/atomic-ops.c | 432 ++-
clang/test/CodeGen/atomic_ops.c | 356 +--
clang/test/CodeGen/c11atomics-ios.c | 207 +-
clang/test/CodeGen/c11atomics.c | 754 ++----
.../CodeGen/sanitize-atomic-int-overflow.c | 21 +-
clang/test/CodeGen/stack-arg-probe.c | 2 +
clang/test/CodeGenCUDA/atomic-ops.cu | 2355 ++---------------
clang/test/CodeGenCXX/atomic-inline.cpp | 154 +-
clang/test/CodeGenOpenCL/atomic-ops.cl | 823 ++----
llvm/include/llvm/IR/RuntimeLibcalls.def | 1 -
llvm/include/llvm/MC/TargetRegistry.h | 7 +-
llvm/include/llvm/Support/AtomicOrdering.h | 8 +-
llvm/include/llvm/Target/TargetMachine.h | 4 -
.../llvm/Transforms/Utils/BuildLibCalls.h | 7 +-
llvm/lib/Analysis/TargetLibraryInfo.cpp | 5 -
llvm/lib/CodeGen/AtomicExpandPass.cpp | 72 +-
llvm/lib/CodeGen/LLVMTargetMachine.cpp | 4 -
llvm/lib/MC/TargetRegistry.cpp | 16 -
llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 2 +
.../AMDGPU/expand-atomic-f64-agent.ll | 198 +-
.../AMDGPU/expand-atomic-f64-system.ll | 198 +-
.../AMDGPU/expand-atomic-fp128.ll | 108 +-
.../AtomicExpand/AMDGPU/expand-atomic-i128.ll | 135 +-
.../AMDGPU/expand-atomicrmw-fp-vector.ll | 264 +-
.../AtomicExpand/PowerPC/cmpxchg.ll | 22 +-
.../AtomicExpand/RISCV/atomicrmw-fp.ll | 58 +-
.../Transforms/AtomicExpand/SPARC/libcalls.ll | 283 +-
52 files changed, 2518 insertions(+), 4691 deletions(-)
diff --git a/clang/include/clang/CodeGen/BackendUtil.h b/clang/include/clang/CodeGen/BackendUtil.h
index f38166f32afdb9..fc8ed4f011f922 100644
--- a/clang/include/clang/CodeGen/BackendUtil.h
+++ b/clang/include/clang/CodeGen/BackendUtil.h
@@ -19,7 +19,6 @@ namespace llvm {
template <typename T> class IntrusiveRefCntPtr;
class Module;
class MemoryBufferRef;
- class TargetOptions;
namespace vfs {
class FileSystem;
} // namespace vfs
@@ -55,15 +54,6 @@ namespace clang {
void EmbedObject(llvm::Module *M, const CodeGenOptions &CGOpts,
DiagnosticsEngine &Diags);
-
- std::optional<llvm::CodeModel::Model>
- getCodeModel(const CodeGenOptions &CodeGenOpts);
-
- bool initTargetOptions(DiagnosticsEngine &Diags, llvm::TargetOptions &Options,
- const CodeGenOptions &CodeGenOpts,
- const clang::TargetOptions &TargetOpts,
- const LangOptions &LangOpts,
- const HeaderSearchOptions &HSOpts);
}
#endif
diff --git a/clang/include/clang/CodeGen/ModuleBuilder.h b/clang/include/clang/CodeGen/ModuleBuilder.h
index 382e26353719a7..59b9840d02e086 100644
--- a/clang/include/clang/CodeGen/ModuleBuilder.h
+++ b/clang/include/clang/CodeGen/ModuleBuilder.h
@@ -22,7 +22,6 @@ namespace llvm {
class LLVMContext;
class Module;
class StringRef;
- class TargetMachine;
namespace vfs {
class FileSystem;
@@ -113,7 +112,6 @@ CodeGenerator *CreateLLVMCodeGen(DiagnosticsEngine &Diags,
const PreprocessorOptions &PreprocessorOpts,
const CodeGenOptions &CGO,
llvm::LLVMContext &C,
- llvm::TargetMachine *TM = nullptr,
CoverageSourceInfo *CoverageInfo = nullptr);
} // end namespace clang
diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h
index 9fd15ecda4dfbe..3464654284f199 100644
--- a/clang/include/clang/Frontend/CompilerInstance.h
+++ b/clang/include/clang/Frontend/CompilerInstance.h
@@ -35,7 +35,6 @@ namespace llvm {
class raw_fd_ostream;
class Timer;
class TimerGroup;
-class TargetMachine;
}
namespace clang {
@@ -87,8 +86,6 @@ class CompilerInstance : public ModuleLoader {
/// The target being compiled for.
IntrusiveRefCntPtr<TargetInfo> Target;
- std::unique_ptr<llvm::TargetMachine> TM;
-
/// Auxiliary Target info.
IntrusiveRefCntPtr<TargetInfo> AuxTarget;
@@ -360,8 +357,6 @@ class CompilerInstance : public ModuleLoader {
return Invocation->getTargetOpts();
}
- llvm::TargetMachine *getTargetMachine() const { return TM.get(); }
-
/// @}
/// @name Diagnostics Engine
/// @{
diff --git a/clang/lib/CodeGen/BackendConsumer.h b/clang/lib/CodeGen/BackendConsumer.h
index d7eecc265f896c..a023d29cbd1d73 100644
--- a/clang/lib/CodeGen/BackendConsumer.h
+++ b/clang/lib/CodeGen/BackendConsumer.h
@@ -17,7 +17,6 @@
namespace llvm {
class DiagnosticInfoDontCall;
- class TargetMachine;
}
namespace clang {
@@ -79,7 +78,6 @@ class BackendConsumer : public ASTConsumer {
const std::string &InFile,
SmallVector<LinkModule, 4> LinkModules,
std::unique_ptr<raw_pwrite_stream> OS, llvm::LLVMContext &C,
- llvm::TargetMachine *TM,
CoverageSourceInfo *CoverageInfo = nullptr);
// This constructor is used in installing an empty BackendConsumer
@@ -92,7 +90,7 @@ class BackendConsumer : public ASTConsumer {
const CodeGenOptions &CodeGenOpts,
const TargetOptions &TargetOpts, const LangOptions &LangOpts,
llvm::Module *Module, SmallVector<LinkModule, 4> LinkModules,
- llvm::LLVMContext &C, llvm::TargetMachine *TM,
+ llvm::LLVMContext &C,
CoverageSourceInfo *CoverageInfo = nullptr);
llvm::Module *getModule() const;
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 03c1e5969099b6..e765bbf637a661 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -289,8 +289,8 @@ static bool asanUseGlobalsGC(const Triple &T, const CodeGenOptions &CGOpts) {
return false;
}
-std::optional<llvm::CodeModel::Model>
-clang::getCodeModel(const CodeGenOptions &CodeGenOpts) {
+static std::optional<llvm::CodeModel::Model>
+getCodeModel(const CodeGenOptions &CodeGenOpts) {
unsigned CodeModel = llvm::StringSwitch<unsigned>(CodeGenOpts.CodeModel)
.Case("tiny", llvm::CodeModel::Tiny)
.Case("small", llvm::CodeModel::Small)
@@ -321,7 +321,7 @@ static bool actionRequiresCodeGen(BackendAction Action) {
Action != Backend_EmitLL;
}
-bool clang::initTargetOptions(DiagnosticsEngine &Diags,
+static bool initTargetOptions(DiagnosticsEngine &Diags,
llvm::TargetOptions &Options,
const CodeGenOptions &CodeGenOpts,
const clang::TargetOptions &TargetOpts,
@@ -561,7 +561,7 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
std::string FeaturesStr =
llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ",");
llvm::Reloc::Model RM = CodeGenOpts.RelocationModel;
- std::optional<llvm::CodeGenOptLevel> OptLevelOrNone =
+ std::optional<CodeGenOptLevel> OptLevelOrNone =
CodeGenOpt::getLevel(CodeGenOpts.OptimizationLevel);
assert(OptLevelOrNone && "Invalid optimization level!");
CodeGenOptLevel OptLevel = *OptLevelOrNone;
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index 9fb6ecee50a6d8..fbe9569e50ef63 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -22,7 +22,6 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Operator.h"
-#include "llvm/Transforms/Utils/BuildBuiltins.h"
using namespace clang;
using namespace CodeGen;
@@ -134,9 +133,7 @@ namespace {
QualType getValueType() const { return ValueTy; }
CharUnits getAtomicAlignment() const { return AtomicAlign; }
uint64_t getAtomicSizeInBits() const { return AtomicSizeInBits; }
- uint64_t getAtomicSizeInBytes() const { return AtomicSizeInBits / 8; }
uint64_t getValueSizeInBits() const { return ValueSizeInBits; }
- uint64_t getValueSizeInBytes() const { return ValueSizeInBits / 8; }
TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; }
bool shouldUseLibcall() const { return UseLibcall; }
const LValue &getAtomicLValue() const { return LVal; }
@@ -377,6 +374,130 @@ bool AtomicInfo::emitMemSetZeroIfNecessary() const {
return true;
}
+static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
+ Address Dest, Address Ptr,
+ Address Val1, Address Val2,
+ uint64_t Size,
+ llvm::AtomicOrdering SuccessOrder,
+ llvm::AtomicOrdering FailureOrder,
+ llvm::SyncScope::ID Scope) {
+ // Note that cmpxchg doesn't support weak cmpxchg, at least at the moment.
+ llvm::Value *Expected = CGF.Builder.CreateLoad(Val1);
+ llvm::Value *Desired = CGF.Builder.CreateLoad(Val2);
+
+ llvm::AtomicCmpXchgInst *Pair = CGF.Builder.CreateAtomicCmpXchg(
+ Ptr, Expected, Desired, SuccessOrder, FailureOrder, Scope);
+ Pair->setVolatile(E->isVolatile());
+ Pair->setWeak(IsWeak);
+
+ // Cmp holds the result of the compare-exchange operation: true on success,
+ // false on failure.
+ llvm::Value *Old = CGF.Builder.CreateExtractValue(Pair, 0);
+ llvm::Value *Cmp = CGF.Builder.CreateExtractValue(Pair, 1);
+
+ // This basic block is used to hold the store instruction if the operation
+ // failed.
+ llvm::BasicBlock *StoreExpectedBB =
+ CGF.createBasicBlock("cmpxchg.store_expected", CGF.CurFn);
+
+ // This basic block is the exit point of the operation, we should end up
+ // here regardless of whether or not the operation succeeded.
+ llvm::BasicBlock *ContinueBB =
+ CGF.createBasicBlock("cmpxchg.continue", CGF.CurFn);
+
+ // Update Expected if Expected isn't equal to Old, otherwise branch to the
+ // exit point.
+ CGF.Builder.CreateCondBr(Cmp, ContinueBB, StoreExpectedBB);
+
+ CGF.Builder.SetInsertPoint(StoreExpectedBB);
+ // Update the memory at Expected with Old's value.
+ CGF.Builder.CreateStore(Old, Val1);
+ // Finally, branch to the exit point.
+ CGF.Builder.CreateBr(ContinueBB);
+
+ CGF.Builder.SetInsertPoint(ContinueBB);
+ // Update the memory at Dest with Cmp's value.
+ CGF.EmitStoreOfScalar(Cmp, CGF.MakeAddrLValue(Dest, E->getType()));
+}
+
+/// Given an ordering required on success, emit all possible cmpxchg
+/// instructions to cope with the provided (but possibly only dynamically known)
+/// FailureOrder.
+static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
+ bool IsWeak, Address Dest, Address Ptr,
+ Address Val1, Address Val2,
+ llvm::Value *FailureOrderVal,
+ uint64_t Size,
+ llvm::AtomicOrdering SuccessOrder,
+ llvm::SyncScope::ID Scope) {
+ llvm::AtomicOrdering FailureOrder;
+ if (llvm::ConstantInt *FO = dyn_cast<llvm::ConstantInt>(FailureOrderVal)) {
+ auto FOS = FO->getSExtValue();
+ if (!llvm::isValidAtomicOrderingCABI(FOS))
+ FailureOrder = llvm::AtomicOrdering::Monotonic;
+ else
+ switch ((llvm::AtomicOrderingCABI)FOS) {
+ case llvm::AtomicOrderingCABI::relaxed:
+ // 31.7.2.18: "The failure argument shall not be memory_order_release
+ // nor memory_order_acq_rel". Fallback to monotonic.
+ case llvm::AtomicOrderingCABI::release:
+ case llvm::AtomicOrderingCABI::acq_rel:
+ FailureOrder = llvm::AtomicOrdering::Monotonic;
+ break;
+ case llvm::AtomicOrderingCABI::consume:
+ case llvm::AtomicOrderingCABI::acquire:
+ FailureOrder = llvm::AtomicOrdering::Acquire;
+ break;
+ case llvm::AtomicOrderingCABI::seq_cst:
+ FailureOrder = llvm::AtomicOrdering::SequentiallyConsistent;
+ break;
+ }
+ // Prior to c++17, "the failure argument shall be no stronger than the
+ // success argument". This condition has been lifted and the only
+ // precondition is 31.7.2.18. Effectively treat this as a DR and skip
+ // language version checks.
+ emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
+ FailureOrder, Scope);
+ return;
+ }
+
+ // Create all the relevant BB's
+ auto *MonotonicBB = CGF.createBasicBlock("monotonic_fail", CGF.CurFn);
+ auto *AcquireBB = CGF.createBasicBlock("acquire_fail", CGF.CurFn);
+ auto *SeqCstBB = CGF.createBasicBlock("seqcst_fail", CGF.CurFn);
+ auto *ContBB = CGF.createBasicBlock("atomic.continue", CGF.CurFn);
+
+ // MonotonicBB is arbitrarily chosen as the default case; in practice, this
+ // doesn't matter unless someone is crazy enough to use something that
+ // doesn't fold to a constant for the ordering.
+ llvm::SwitchInst *SI = CGF.Builder.CreateSwitch(FailureOrderVal, MonotonicBB);
+ // Implemented as acquire, since it's the closest in LLVM.
+ SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
+ AcquireBB);
+ SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::acquire),
+ AcquireBB);
+ SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
+ SeqCstBB);
+
+ // Emit all the different atomics
+ CGF.Builder.SetInsertPoint(MonotonicBB);
+ emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
+ Size, SuccessOrder, llvm::AtomicOrdering::Monotonic, Scope);
+ CGF.Builder.CreateBr(ContBB);
+
+ CGF.Builder.SetInsertPoint(AcquireBB);
+ emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
+ llvm::AtomicOrdering::Acquire, Scope);
+ CGF.Builder.CreateBr(ContBB);
+
+ CGF.Builder.SetInsertPoint(SeqCstBB);
+ emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
+ llvm::AtomicOrdering::SequentiallyConsistent, Scope);
+ CGF.Builder.CreateBr(ContBB);
+
+ CGF.Builder.SetInsertPoint(ContBB);
+}
+
/// Duplicate the atomic min/max operation in conventional IR for the builtin
/// variants that return the new rather than the original value.
static llvm::Value *EmitPostAtomicMinMax(CGBuilderTy &Builder,
@@ -410,66 +531,53 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
bool PostOpMinMax = false;
unsigned PostOp = 0;
- bool IsWeakOp = false;
switch (E->getOp()) {
case AtomicExpr::AO__c11_atomic_init:
case AtomicExpr::AO__opencl_atomic_init:
llvm_unreachable("Already handled!");
+ case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+ case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
+ case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
+ emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
+ FailureOrder, Size, Order, Scope);
+ return;
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
- IsWeakOp = true;
- [[fallthrough]];
- case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
- case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
- case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: {
- llvm::Value *LLVMPtr = Ptr.emitRawPointer(CGF);
- llvm::Value *Expected = Val1.emitRawPointer(CGF);
- llvm::Value *Desired = Val2.emitRawPointer(CGF);
- llvm::Align Align = Ptr.getAlignment().getAsAlign();
-
- SmallVector<std::pair<uint32_t, StringRef>> SupportedScopes;
- StringRef DefaultScope;
- if (std::unique_ptr<AtomicScopeModel> ScopeModel = E->getScopeModel()) {
- for (unsigned S : ScopeModel->getRuntimeValues())
- SupportedScopes.emplace_back(S, getAsString(ScopeModel->map(S)));
- DefaultScope =
- getAsString(ScopeModel->map(ScopeModel->getFallBackValue()));
- }
-
- llvm::emitAtomicCompareExchangeBuiltin(
- LLVMPtr, Expected, Desired, IsWeakOp, E->isVolatile(), Order,
- FailureOrder, Scope, Expected, Ptr.getElementType(), {}, {}, Align,
- CGF.Builder, CGF.CGM.getDataLayout(), CGF.getTargetLibraryInfo(),
- CGF.getTargetLowering(), SupportedScopes, DefaultScope);
+ emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
+ FailureOrder, Size, Order, Scope);
return;
- }
-
case AtomicExpr::AO__atomic_compare_exchange:
case AtomicExpr::AO__atomic_compare_exchange_n:
case AtomicExpr::AO__scoped_atomic_compare_exchange:
case AtomicExpr::AO__scoped_atomic_compare_exchange_n: {
- llvm::Value *LLVMPtr = Ptr.emitRawPointer(CGF);
- llvm::Value *Expected = Val1.emitRawPointer(CGF);
- llvm::Value *Desired = Val2.emitRawPointer(CGF);
- llvm::Align Align = Ptr.getAlignment().getAsAlign();
-
- SmallVector<std::pair<uint32_t, StringRef>> SupportedScopes;
- StringRef DefaultScope;
- if (std::unique_ptr<AtomicScopeModel> ScopeModel = E->getScopeModel()) {
- for (unsigned S : ScopeModel->getRuntimeValues())
- SupportedScopes.emplace_back(S, getAsString(ScopeModel->map(S)));
- DefaultScope =
- getAsString(ScopeModel->map(ScopeModel->getFallBackValue()));
+ if (llvm::ConstantInt *IsWeakC = dyn_cast<llvm::ConstantInt>(IsWeak)) {
+ emitAtomicCmpXchgFailureSet(CGF, E, IsWeakC->getZExtValue(), Dest, Ptr,
+ Val1, Val2, FailureOrder, Size, Order, Scope);
+ } else {
+ // Create all the relevant BB's
+ llvm::BasicBlock *StrongBB =
+ CGF.createBasicBlock("cmpxchg.strong", CGF.CurFn);
+ llvm::BasicBlock *WeakBB = CGF.createBasicBlock("cmxchg.weak", CGF.CurFn);
+ llvm::BasicBlock *ContBB =
+ CGF.createBasicBlock("cmpxchg.continue", CGF.CurFn);
+
+ llvm::SwitchInst *SI = CGF.Builder.CreateSwitch(IsWeak, WeakBB);
+ SI->addCase(CGF.Builder.getInt1(false), StrongBB);
+
+ CGF.Builder.SetInsertPoint(StrongBB);
+ emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
+ FailureOrder, Size, Order, Scope);
+ CGF.Builder.CreateBr(ContBB);
+
+ CGF.Builder.SetInsertPoint(WeakBB);
+ emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
+ FailureOrder, Size, Order, Scope);
+ CGF.Builder.CreateBr(ContBB);
+
+ CGF.Builder.SetInsertPoint(ContBB);
}
-
- llvm::Value *SuccessVal = llvm::emitAtomicCompareExchangeBuiltin(
- LLVMPtr, Expected, Desired, IsWeak, E->isVolatile(), Order,
- FailureOrder, Scope, Expected, Ptr.getElementType(), {}, {}, Align,
- CGF.Builder, CGF.CGM.getDataLayout(), CGF.getTargetLibraryInfo(),
- CGF.getTargetLowering(), SupportedScopes, DefaultScope);
- CGF.EmitStoreOfScalar(SuccessVal, CGF.MakeAddrLValue(Dest, E->getType()));
return;
}
case AtomicExpr::AO__c11_atomic_load:
@@ -1571,23 +1679,31 @@ AtomicInfo::EmitAtomicCompareExchangeLibcall(llvm::Value *ExpectedAddr,
std::pair<RValue, llvm::Value *> AtomicInfo::EmitAtomicCompareExchange(
RValue Expected, RValue Desired, llvm::AtomicOrdering Success,
llvm::AtomicOrdering Failure, bool IsWeak) {
- llvm::Value *Ptr = getAtomicPointer();
- Address ExpectedAddr = materializeRValue(Expected);
- llvm::Value *ExpectedPtr = ExpectedAddr.emitRawPointer(CGF);
- llvm::Value *DesiredPtr = materializeRValue(Desired).emitRawPointer(CGF);
- Address PrevAddr = CreateTempAlloca();
- llvm::Value *PrevPtr = PrevAddr.emitRawPointer(CGF);
-
- llvm::Value *SuccessResult = llvm::emitAtomicCompareExchangeBuiltin(
- Ptr, ExpectedPtr, DesiredPtr, IsWeak, LVal.isVolatileQualified(), Success,
- Failure, PrevPtr, getAtomicAddress().getElementType(),
- getValueSizeInBytes(), getAtomicSizeInBytes(),
- getAtomicAlignment().getAsAlign(), CGF.Builder, CGF.CGM.getDataLayout(),
- CGF.getTargetLibraryInfo(), CGF.getTargetLowering());
+ // Check whether we should use a library call.
+ if (shouldUseLibcall()) {
+ // Produce a source address.
+ Address ExpectedAddr = materializeRValue(Expected);
+ llvm::Value *ExpectedPtr = ExpectedAddr.emitRawPointer(CGF);
+ llvm::Value *DesiredPtr = materializeRValue(Desired).emitRawPointer(CGF);
+ auto *Res = EmitAtomicCompareExchangeLibcall(ExpectedPtr, DesiredPtr,
+ Success, Failure);
+ return std::make_pair(
+ convertAtomicTempToRValue(ExpectedAddr, AggValueSlot::ignored(),
+ SourceLocation(), /*AsValue=*/false),
+ Res);
+ }
+
+ // If we've got a scalar value of the right size, try to avoid going
+ // through memory.
+ auto *ExpectedVal = convertRValueToInt(Expected, /*CmpXchg=*/true);
+ auto *DesiredVal = convertRValueToInt(Desired, /*CmpXchg=*/true);
+ auto Res = EmitAtomicCompareExchangeOp(ExpectedVal, DesiredVal, Success,
+ Failure, IsWeak);
return std::make_pair(
- convertAtomicTempToRValue(PrevAddr, AggValueSlot::ignored(),
- SourceLocation(), /*AsValue=*/false),
- SuccessResult);
+ ConvertToValueOrAtomic(Res.first, AggValueSlot::ignored(),
+ SourceLocation(), /*AsValue=*/false,
+ /*CmpXchg=*/true),
+ Res.second);
}
static void
diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt
index e2a21420d68bb9..deb7b27266d736 100644
--- a/clang/lib/CodeGen/CMakeLists.txt
+++ b/clang/lib/CodeGen/CMakeLists.txt
@@ -27,7 +27,6 @@ set(LLVM_LINK_COMPONENTS
Passes
ProfileData
ScalarOpts
- SelectionDAG
Support
Target
TargetParser
diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp
index 4e8e77b7e38c81..e87226e60297c0 100644
--- a/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/clang/lib/CodeGen/CodeGenAction.cpp
@@ -116,14 +116,14 @@ BackendConsumer::BackendConsumer(
const TargetOptions &TargetOpts, const LangOptions &LangOpts,
const std::string &InFile, SmallVector<LinkModule, 4> LinkModules,
std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C,
- llvm::TargetMachine *TM, CoverageSourceInfo *CoverageInfo)
+ CoverageSourceInfo *CoverageInfo)
: Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
AsmOutStream(std::move(OS)), Context(nullptr), FS(VFS),
LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
LLVMIRGenerationRefCount(0),
Gen(CreateLLVMCodeGen(Diags, InFile, std::move(VFS), HeaderSearchOpts,
- PPOpts, CodeGenOpts, C, TM, CoverageInfo)),
+ PPOpts, CodeGenOpts, C, CoverageInfo)),
LinkModules(std::move(LinkModules)) {
TimerIsEnabled = CodeGenOpts.TimePasses;
llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses;
@@ -140,14 +140,14 @@ BackendConsumer::BackendConsumer(
const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts,
const TargetOptions &TargetOpts, const LangOptions &LangOpts,
llvm::Module *Module, SmallVector<LinkModule, 4> LinkModules,
- LLVMContext &C, llvm::TargetMachine *TM, CoverageSourceInfo *CoverageInfo)
+ LLVMContext &C, CoverageSourceInfo *CoverageInfo)
: Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
Context(nullptr), FS(VFS),
LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
LLVMIRGenerationRefCount(0),
Gen(CreateLLVMCodeGen(Diags, "", std::move(VFS), HeaderSearchOpts, PPOpts,
- CodeGenOpts, C, TM, CoverageInfo)),
+ CodeGenOpts, C, CoverageInfo)),
LinkModules(std::move(LinkModules)), CurLinkModule(Module) {
TimerIsEnabled = CodeGenOpts.TimePasses;
llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses;
@@ -1019,8 +1019,7 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
BA, CI.getDiagnostics(), &CI.getVirtualFileSystem(),
CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(), CI.getCodeGenOpts(),
CI.getTargetOpts(), CI.getLangOpts(), std::string(InFile),
- std::move(LinkModules), std::move(OS), *VMContext, CI.getTargetMachine(),
- CoverageInfo));
+ std::move(LinkModules), std::move(OS), *VMContext, CoverageInfo));
BEConsumer = Result.get();
// Enable generating macro debug info only when debug info is not disabled and
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index c99f068cbb1246..af201554898f31 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -36,8 +36,6 @@
#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/Frontend/FrontendDiagnostic.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -48,7 +46,6 @@
#include "llvm/IR/Operator.h"
#include "llvm/Support/CRC.h"
#include "llvm/Support/xxhash.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <optional>
@@ -109,12 +106,6 @@ CodeGenFunction::~CodeGenFunction() {
CGM.getOpenMPRuntime().getOMPBuilder().finalize(CurFn);
}
-const llvm::TargetLowering *CodeGenFunction::getTargetLowering() const {
- if (!TSI)
- return nullptr;
- return TSI->getTargetLowering();
-}
-
// Map the LangOption for exception behavior into
// the corresponding enum in the IR.
llvm::fp::ExceptionBehavior
@@ -1528,10 +1519,6 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
if (checkIfFunctionMustProgress())
CurFn->addFnAttr(llvm::Attribute::MustProgress);
- TLI.reset(new llvm::TargetLibraryInfo(CGM.getTargetLibraryInfoImpl(), Fn));
- if (const llvm::TargetMachine *TM = CGM.getTargetMachine())
- TSI = TM->getSubtargetImpl(*Fn);
-
// Generate the body of the function.
PGO.assignRegionCounters(GD, CurFn);
if (isa<CXXDestructorDecl>(FD))
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 5043b501b5f618..1c0a0e117e5607 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -53,7 +53,6 @@ class SwitchInst;
class Twine;
class Value;
class CanonicalLoopInfo;
-class TargetSubtargetInfo;
}
namespace clang {
@@ -286,9 +285,6 @@ class CodeGenFunction : public CodeGenTypeCache {
CodeGenModule &CGM; // Per-module state.
const TargetInfo &Target;
- std::unique_ptr<llvm::TargetLibraryInfo> TLI;
- // std::unique_ptr<llvm::MCSubtarget> STM;
- const llvm::TargetSubtargetInfo *TSI = nullptr;
// For EH/SEH outlined funclets, this field points to parent's CGF
CodeGenFunction *ParentCGF = nullptr;
@@ -2154,12 +2150,6 @@ class CodeGenFunction : public CodeGenTypeCache {
const LangOptions &getLangOpts() const { return CGM.getLangOpts(); }
- const llvm::TargetLibraryInfo *getTargetLibraryInfo() const {
- return TLI.get();
- }
-
- const llvm::TargetLowering *getTargetLowering() const;
-
/// Returns a pointer to the function's exception object and selector slot,
/// which is assigned in every landing pad.
Address getExceptionSlot();
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index effc5ed6ebee14..31f6632df9f27d 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -71,7 +71,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/xxhash.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/TargetParser/RISCVISAInfo.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/TargetParser/X86TargetParser.h"
@@ -334,17 +333,18 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
return *TheTargetCodeGenInfo;
}
-CodeGenModule::CodeGenModule(
- ASTContext &C, IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
- const HeaderSearchOptions &HSO, const PreprocessorOptions &PPO,
- const CodeGenOptions &CGO, llvm::Module &M, DiagnosticsEngine &diags,
- llvm::TargetLibraryInfoImpl &TLII, const llvm::TargetMachine *TM,
- CoverageSourceInfo *CoverageInfo)
+CodeGenModule::CodeGenModule(ASTContext &C,
+ IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
+ const HeaderSearchOptions &HSO,
+ const PreprocessorOptions &PPO,
+ const CodeGenOptions &CGO, llvm::Module &M,
+ DiagnosticsEngine &diags,
+ CoverageSourceInfo *CoverageInfo)
: Context(C), LangOpts(C.getLangOpts()), FS(FS), HeaderSearchOpts(HSO),
PreprocessorOpts(PPO), CodeGenOpts(CGO), TheModule(M), Diags(diags),
Target(C.getTargetInfo()), ABI(createCXXABI(*this)),
VMContext(M.getContext()), Types(*this), VTables(*this),
- SanitizerMD(new SanitizerMetadata(*this)), TLII(TLII), TM(TM) {
+ SanitizerMD(new SanitizerMetadata(*this)) {
// Initialize the type cache.
llvm::LLVMContext &LLVMContext = M.getContext();
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index e11e653cf3f676..284bba823baeb4 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -49,8 +49,6 @@ class DataLayout;
class FunctionType;
class LLVMContext;
class IndexedInstrProfReader;
-class TargetLowering;
-class TargetMachine;
namespace vfs {
class FileSystem;
@@ -309,9 +307,6 @@ class CodeGenModule : public CodeGenTypeCache {
const CodeGenOptions &CodeGenOpts;
unsigned NumAutoVarInit = 0;
llvm::Module &TheModule;
- llvm::TargetLibraryInfoImpl &TLII;
- const llvm::TargetMachine *TM;
-
DiagnosticsEngine &Diags;
const TargetInfo &Target;
std::unique_ptr<CGCXXABI> ABI;
@@ -637,8 +632,7 @@ class CodeGenModule : public CodeGenTypeCache {
const HeaderSearchOptions &headersearchopts,
const PreprocessorOptions &ppopts,
const CodeGenOptions &CodeGenOpts, llvm::Module &M,
- DiagnosticsEngine &Diags, llvm::TargetLibraryInfoImpl &TLII,
- const llvm::TargetMachine *TM,
+ DiagnosticsEngine &Diags,
CoverageSourceInfo *CoverageInfo = nullptr);
~CodeGenModule();
@@ -777,8 +771,6 @@ class CodeGenModule : public CodeGenTypeCache {
const llvm::DataLayout &getDataLayout() const {
return TheModule.getDataLayout();
}
- llvm::TargetLibraryInfoImpl &getTargetLibraryInfoImpl() const { return TLII; }
- const llvm::TargetMachine *getTargetMachine() const { return TM; }
const TargetInfo &getTarget() const { return Target; }
const llvm::Triple &getTriple() const { return Target.getTriple(); }
bool supportsCOMDAT() const;
diff --git a/clang/lib/CodeGen/ModuleBuilder.cpp b/clang/lib/CodeGen/ModuleBuilder.cpp
index a698f19f88e548..d4e0ab0339a8b0 100644
--- a/clang/lib/CodeGen/ModuleBuilder.cpp
+++ b/clang/lib/CodeGen/ModuleBuilder.cpp
@@ -20,13 +20,10 @@
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/TargetInfo.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/VirtualFileSystem.h"
-#include "llvm/Target/TargetMachine.h"
#include <memory>
using namespace clang;
@@ -66,8 +63,6 @@ namespace {
protected:
std::unique_ptr<llvm::Module> M;
std::unique_ptr<CodeGen::CodeGenModule> Builder;
- std::unique_ptr<llvm::TargetLibraryInfoImpl> TLII;
- const llvm::TargetMachine *TM;
private:
SmallVector<FunctionDecl *, 8> DeferredInlineMemberFuncDefs;
@@ -84,12 +79,12 @@ namespace {
IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
const HeaderSearchOptions &HSO,
const PreprocessorOptions &PPO, const CodeGenOptions &CGO,
- llvm::LLVMContext &C, llvm::TargetMachine *TM,
+ llvm::LLVMContext &C,
CoverageSourceInfo *CoverageInfo = nullptr)
: Diags(diags), Ctx(nullptr), FS(std::move(FS)), HeaderSearchOpts(HSO),
PreprocessorOpts(PPO), CodeGenOpts(CGO), HandlingTopLevelDecls(0),
CoverageInfo(CoverageInfo),
- M(new llvm::Module(ExpandModuleName(ModuleName, CGO), C)), TM(TM) {
+ M(new llvm::Module(ExpandModuleName(ModuleName, CGO), C)) {
C.setDiscardValueNames(CGO.DiscardValueNames);
}
@@ -156,8 +151,7 @@ namespace {
void Initialize(ASTContext &Context) override {
Ctx = &Context;
- llvm::Triple TargetTriple = Ctx->getTargetInfo().getTriple();
- M->setTargetTriple(TargetTriple.getTriple());
+ M->setTargetTriple(Ctx->getTargetInfo().getTriple().getTriple());
M->setDataLayout(Ctx->getTargetInfo().getDataLayoutString());
const auto &SDKVersion = Ctx->getTargetInfo().getSDKVersion();
if (!SDKVersion.empty())
@@ -167,12 +161,9 @@ namespace {
if (auto TVSDKVersion =
Ctx->getTargetInfo().getDarwinTargetVariantSDKVersion())
M->setDarwinTargetVariantSDKVersion(*TVSDKVersion);
-
- TLII.reset(
- llvm::driver::createTLII(TargetTriple, CodeGenOpts.getVecLib()));
- Builder.reset(new CodeGen::CodeGenModule(
- Context, FS, HeaderSearchOpts, PreprocessorOpts, CodeGenOpts, *M,
- Diags, *TLII.get(), TM, CoverageInfo));
+ Builder.reset(new CodeGen::CodeGenModule(Context, FS, HeaderSearchOpts,
+ PreprocessorOpts, CodeGenOpts,
+ *M, Diags, CoverageInfo));
for (auto &&Lib : CodeGenOpts.DependentLibraries)
Builder->AddDependentLib(Lib);
@@ -375,9 +366,8 @@ clang::CreateLLVMCodeGen(DiagnosticsEngine &Diags, llvm::StringRef ModuleName,
const HeaderSearchOptions &HeaderSearchOpts,
const PreprocessorOptions &PreprocessorOpts,
const CodeGenOptions &CGO, llvm::LLVMContext &C,
- llvm::TargetMachine *TM,
CoverageSourceInfo *CoverageInfo) {
return new CodeGeneratorImpl(Diags, ModuleName, std::move(FS),
- HeaderSearchOpts, PreprocessorOpts, CGO, C, TM,
+ HeaderSearchOpts, PreprocessorOpts, CGO, C,
CoverageInfo);
}
diff --git a/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp b/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp
index fcff475c3d683d..3a1f745d9ed777 100644
--- a/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp
+++ b/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp
@@ -21,7 +21,6 @@
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Bitstream/BitstreamReader.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/IR/Constants.h"
@@ -57,7 +56,6 @@ class PCHContainerGenerator : public ASTConsumer {
std::unique_ptr<CodeGen::CodeGenModule> Builder;
std::unique_ptr<raw_pwrite_stream> OS;
std::shared_ptr<PCHBuffer> Buffer;
- std::unique_ptr<llvm::TargetLibraryInfoImpl> TLII;
/// Visit every type and emit debug info for it.
struct DebugTypeVisitor : public RecursiveASTVisitor<DebugTypeVisitor> {
@@ -179,11 +177,8 @@ class PCHContainerGenerator : public ASTConsumer {
VMContext.reset(new llvm::LLVMContext());
M.reset(new llvm::Module(MainFileName, *VMContext));
M->setDataLayout(Ctx->getTargetInfo().getDataLayoutString());
- llvm::Triple TargetTriple(M->getTargetTriple());
- TLII.reset(llvm::driver::createTLII(TargetTriple, CodeGenOpts.getVecLib()));
- Builder.reset(new CodeGen::CodeGenModule(*Ctx, FS, HeaderSearchOpts,
- PreprocessorOpts, CodeGenOpts, *M,
- Diags, *TLII.get(), nullptr));
+ Builder.reset(new CodeGen::CodeGenModule(
+ *Ctx, FS, HeaderSearchOpts, PreprocessorOpts, CodeGenOpts, *M, Diags));
// Prepare CGDebugInfo to emit debug info for a clang module.
auto *DI = Builder->getModuleDebugInfo();
diff --git a/clang/lib/Frontend/CMakeLists.txt b/clang/lib/Frontend/CMakeLists.txt
index f1ec1e52272d8a..a9166672088459 100644
--- a/clang/lib/Frontend/CMakeLists.txt
+++ b/clang/lib/Frontend/CMakeLists.txt
@@ -51,7 +51,6 @@ add_clang_library(clangFrontend
clangAPINotes
clangAST
clangBasic
- clangCodeGen
clangDriver
clangEdit
clangLex
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index d39775009a725e..6242b5a7d9fe39 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -19,7 +19,6 @@
#include "clang/Basic/Stack.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/Version.h"
-#include "clang/CodeGen/BackendUtil.h"
#include "clang/Config/config.h"
#include "clang/Frontend/ChainedDiagnosticConsumer.h"
#include "clang/Frontend/FrontendAction.h"
@@ -44,7 +43,6 @@
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/BuryPointer.h"
#include "llvm/Support/CrashRecoveryContext.h"
#include "llvm/Support/Errc.h"
@@ -57,7 +55,6 @@
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/TargetParser/Host.h"
#include <optional>
#include <time.h>
@@ -157,35 +154,6 @@ bool CompilerInstance::createTarget() {
if (auto *Aux = getAuxTarget())
getTarget().setAuxTarget(Aux);
- llvm::Triple TargetTriple = getTarget().getTriple();
- TargetOptions &TargetOpts = getTargetOpts();
- std::string Error;
- const llvm::Target *TheTarget =
- llvm::TargetRegistry::lookupTarget(TargetTriple.getTriple(), Error);
- if (TheTarget) {
- CodeGenOptions &CodeGenOpts = getCodeGenOpts();
- std::optional<llvm::CodeModel::Model> CM = getCodeModel(CodeGenOpts);
- std::string FeaturesStr =
- llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ",");
- llvm::Reloc::Model RM = CodeGenOpts.RelocationModel;
- std::optional<llvm::CodeGenOptLevel> OptLevelOrNone =
- llvm::CodeGenOpt::getLevel(CodeGenOpts.OptimizationLevel);
- assert(OptLevelOrNone && "Invalid optimization level!");
- llvm::CodeGenOptLevel OptLevel = *OptLevelOrNone;
-
- llvm::TargetOptions Options;
- bool Scc =
- initTargetOptions(getDiagnostics(), Options, CodeGenOpts, TargetOpts,
- getLangOpts(), getHeaderSearchOpts());
- if (Scc) {
- TM.reset(TheTarget->createTargetMachine(TargetTriple.getTriple(),
- TargetOpts.CPU, FeaturesStr,
- Options, RM, CM, OptLevel));
- if (TM)
- TM->setLargeDataThreshold(CodeGenOpts.LargeDataThreshold);
- }
- }
-
return true;
}
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cas.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cas.c
index 1d7db42f6c1f3e..73ffe0694be6d2 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cas.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cas.c
@@ -11,26 +11,17 @@
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
// CHECK-NEXT: store i32 [[C:%.*]], ptr [[C_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[C_ADDR]], align 4
-// CHECK-NEXT: store volatile i32 [[TMP0]], ptr [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: store volatile i32 [[TMP1]], ptr [[ATOMIC_TEMP1]], align 4
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[ATOMIC_TEMP1]], align 4
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg weak volatile ptr [[A_ADDR]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] monotonic monotonic, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP2]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: [[TMP2:%.*]] = load volatile i32, ptr [[ATOMIC_TEMP2]], align 4
-// CHECK-NEXT: store i32 [[TMP2]], ptr [[B_ADDR]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[CMPXCHG_SUCCESS]] to i32
-// CHECK-NEXT: ret i32 [[TMP3]]
+// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg weak volatile ptr [[A_ADDR]], i32 [[TMP0]], i32 [[TMP1]] monotonic monotonic, align 4
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+// CHECK-NEXT: store i32 [[TMP3]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = zext i1 [[TMP4]] to i32
+// CHECK-NEXT: ret i32 [[TMP5]]
//
int test_builtin_ppc_compare_and_swap(int a, int b, int c) {
return __compare_and_swap(&a, &b, c);
@@ -42,26 +33,17 @@ int test_builtin_ppc_compare_and_swap(int a, int b, int c) {
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i64, align 8
-// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i64, align 8
-// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i64, align 8
// CHECK-NEXT: store i64 [[A:%.*]], ptr [[A_ADDR]], align 8
// CHECK-NEXT: store i64 [[B:%.*]], ptr [[B_ADDR]], align 8
// CHECK-NEXT: store i64 [[C:%.*]], ptr [[C_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[B_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[C_ADDR]], align 8
-// CHECK-NEXT: store volatile i64 [[TMP0]], ptr [[ATOMIC_TEMP]], align 8
-// CHECK-NEXT: store volatile i64 [[TMP1]], ptr [[ATOMIC_TEMP1]], align 8
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i64, ptr [[ATOMIC_TEMP]], align 8
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[ATOMIC_TEMP1]], align 8
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg weak volatile ptr [[A_ADDR]], i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] monotonic monotonic, align 8
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP2]], align 8
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: [[TMP2:%.*]] = load volatile i64, ptr [[ATOMIC_TEMP2]], align 8
-// CHECK-NEXT: store i64 [[TMP2]], ptr [[B_ADDR]], align 8
-// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[CMPXCHG_SUCCESS]] to i32
-// CHECK-NEXT: ret i32 [[TMP3]]
+// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg weak volatile ptr [[A_ADDR]], i64 [[TMP0]], i64 [[TMP1]] monotonic monotonic, align 8
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
+// CHECK-NEXT: store i64 [[TMP3]], ptr [[B_ADDR]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = zext i1 [[TMP4]] to i32
+// CHECK-NEXT: ret i32 [[TMP5]]
//
int test_builtin_ppc_compare_and_swaplp(long a, long b, long c) {
return __compare_and_swaplp(&a, &b, c);
diff --git a/clang/test/CodeGen/PowerPC/quadword-atomics.c b/clang/test/CodeGen/PowerPC/quadword-atomics.c
index 161ca891d32483..dc04423060a03b 100644
--- a/clang/test/CodeGen/PowerPC/quadword-atomics.c
+++ b/clang/test/CodeGen/PowerPC/quadword-atomics.c
@@ -1,4 +1,3 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64le-linux-gnu \
// RUN: -target-cpu pwr8 -emit-llvm -o - %s | FileCheck %s \
// RUN: --check-prefixes=PPC64,PPC64-QUADWORD-ATOMICS
@@ -24,42 +23,62 @@ typedef _Atomic(Q) AtomicQ;
typedef __int128_t int128_t;
+// PPC64-LABEL: @test_load(
+// PPC64: [[TMP3:%.*]] = load atomic i128, ptr [[TMP1:%.*]] acquire, align 16
+//
Q test_load(AtomicQ *ptr) {
// expected-no-diagnostics
return __c11_atomic_load(ptr, __ATOMIC_ACQUIRE);
}
+// PPC64-LABEL: @test_store(
+// PPC64: store atomic i128 [[TMP6:%.*]], ptr [[TMP4:%.*]] release, align 16
+//
void test_store(Q val, AtomicQ *ptr) {
// expected-no-diagnostics
__c11_atomic_store(ptr, val, __ATOMIC_RELEASE);
}
+// PPC64-LABEL: @test_add(
+// PPC64: [[ATOMICRMW:%.*]] = atomicrmw add ptr [[TMP0:%.*]], i128 [[TMP2:%.*]] monotonic, align 16
+//
void test_add(_Atomic(int128_t) *ptr, int128_t x) {
// expected-no-diagnostics
__c11_atomic_fetch_add(ptr, x, __ATOMIC_RELAXED);
}
+// PPC64-LABEL: @test_xchg(
+// PPC64: [[TMP8:%.*]] = atomicrmw xchg ptr [[TMP4:%.*]], i128 [[TMP7:%.*]] seq_cst, align 16
+//
Q test_xchg(AtomicQ *ptr, Q new) {
// expected-no-diagnostics
return __c11_atomic_exchange(ptr, new, __ATOMIC_SEQ_CST);
}
+// PPC64-LABEL: @test_cmpxchg(
+// PPC64: [[TMP10:%.*]] = cmpxchg ptr [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
+//
int test_cmpxchg(AtomicQ *ptr, Q *cmp, Q new) {
// expected-no-diagnostics
return __c11_atomic_compare_exchange_strong(ptr, cmp, new, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
}
+// PPC64-LABEL: @test_cmpxchg_weak(
+// PPC64: [[TMP10:%.*]] = cmpxchg weak ptr [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
+//
int test_cmpxchg_weak(AtomicQ *ptr, Q *cmp, Q new) {
// expected-no-diagnostics
return __c11_atomic_compare_exchange_weak(ptr, cmp, new, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
}
+// PPC64-QUADWORD-ATOMICS-LABEL: @is_lock_free(
+// PPC64-QUADWORD-ATOMICS: ret i32 1
+//
+// PPC64-NO-QUADWORD-ATOMICS-LABEL: @is_lock_free(
+// PPC64-NO-QUADWORD-ATOMICS: [[CALL:%.*]] = call zeroext i1 @__atomic_is_lock_free(i64 noundef 16, ptr noundef null)
+//
int is_lock_free() {
AtomicQ q;
// expected-no-diagnostics
return __c11_atomic_is_lock_free(sizeof(q));
}
-//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-// PPC64: {{.*}}
-// PPC64-NO-QUADWORD-ATOMICS: {{.*}}
-// PPC64-QUADWORD-ATOMICS: {{.*}}
diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c
index b74994ffd46084..e3db2063312d2b 100644
--- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c
+++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c
@@ -82,12 +82,16 @@ __int128 f6() {
// CHECK-LABEL: @f7(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Des, align 16, !tbaa [[TBAA2]]
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i128, ptr @Exp, align 16
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @Ptr, i128 [[CMPXCHG_EXPECTED]], i128 [[TMP0]] seq_cst seq_cst, align 16
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i128 [[CMPXCHG_PREV]], ptr @Exp, align 16
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
+// CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Exp, align 16
+// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP1]], i128 [[TMP0]] seq_cst seq_cst, align 16
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1
+// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
+// CHECK: cmpxchg.store_expected:
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
+// CHECK-NEXT: store i128 [[TMP4]], ptr @Exp, align 16
+// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
+// CHECK: cmpxchg.continue:
+// CHECK-NEXT: ret i1 [[TMP3]]
//
_Bool f7() {
return __atomic_compare_exchange_n(&Ptr, &Exp, Des, 0,
@@ -96,13 +100,17 @@ _Bool f7() {
// CHECK-LABEL: @f8(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i128, ptr @Exp, align 16
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i128, ptr @Des, align 16
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @Ptr, i128 [[CMPXCHG_EXPECTED]], i128 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 16
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i128 [[CMPXCHG_PREV]], ptr @Exp, align 16
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Exp, align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Des, align 16
+// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP0]], i128 [[TMP1]] seq_cst seq_cst, align 16
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1
+// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
+// CHECK: cmpxchg.store_expected:
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
+// CHECK-NEXT: store i128 [[TMP4]], ptr @Exp, align 16
+// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
+// CHECK: cmpxchg.continue:
+// CHECK-NEXT: ret i1 [[TMP3]]
//
_Bool f8() {
return __atomic_compare_exchange(&Ptr, &Exp, &Des, 0,
diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c
index 1cedcd5edb8a0b..8759df7b19c638 100644
--- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c
+++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c
@@ -87,12 +87,16 @@ __int128 f6() {
// CHECK-LABEL: @f7(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Des, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i128, ptr @Exp, align 8
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @Ptr, i128 [[CMPXCHG_EXPECTED]], i128 [[TMP0]] seq_cst seq_cst, align 8
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i128 [[CMPXCHG_PREV]], ptr @Exp, align 8
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
+// CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Exp, align 8
+// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP1]], i128 [[TMP0]] seq_cst seq_cst, align 8
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1
+// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
+// CHECK: cmpxchg.store_expected:
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
+// CHECK-NEXT: store i128 [[TMP4]], ptr @Exp, align 8
+// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
+// CHECK: cmpxchg.continue:
+// CHECK-NEXT: ret i1 [[TMP3]]
//
_Bool f7() {
return __atomic_compare_exchange_n(&Ptr, &Exp, Des, 0,
@@ -101,13 +105,17 @@ _Bool f7() {
// CHECK-LABEL: @f8(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i128, ptr @Exp, align 8
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i128, ptr @Des, align 8
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @Ptr, i128 [[CMPXCHG_EXPECTED]], i128 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 8
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i128 [[CMPXCHG_PREV]], ptr @Exp, align 8
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Exp, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Des, align 8
+// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP0]], i128 [[TMP1]] seq_cst seq_cst, align 8
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1
+// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
+// CHECK: cmpxchg.store_expected:
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
+// CHECK-NEXT: store i128 [[TMP4]], ptr @Exp, align 8
+// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
+// CHECK: cmpxchg.continue:
+// CHECK-NEXT: ret i1 [[TMP3]]
//
_Bool f8() {
return __atomic_compare_exchange(&Ptr, &Exp, &Des, 0,
diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i16.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i16.c
index 2ea902dde70fc9..7c6a82f14197a1 100644
--- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i16.c
+++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i16.c
@@ -68,12 +68,16 @@ int16_t f6(int16_t *Ptr, int16_t *Val, int16_t *Ret) {
// CHECK-LABEL: @f7(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i16, ptr [[EXP:%.*]], align 2
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i16 [[CMPXCHG_EXPECTED]], i16 [[DES:%.*]] seq_cst seq_cst, align 2
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i16 [[CMPXCHG_PREV]], ptr [[EXP]], align 2
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[EXP:%.*]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[PTR:%.*]], i16 [[TMP0]], i16 [[DES:%.*]] seq_cst seq_cst, align 2
+// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i16, i1 } [[TMP1]], 1
+// CHECK-NEXT: br i1 [[TMP2]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
+// CHECK: cmpxchg.store_expected:
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i16, i1 } [[TMP1]], 0
+// CHECK-NEXT: store i16 [[TMP3]], ptr [[EXP]], align 2
+// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
+// CHECK: cmpxchg.continue:
+// CHECK-NEXT: ret i1 [[TMP2]]
//
_Bool f7(int16_t *Ptr, int16_t *Exp, int16_t Des) {
return __atomic_compare_exchange_n(Ptr, Exp, Des, 0,
@@ -82,13 +86,17 @@ _Bool f7(int16_t *Ptr, int16_t *Exp, int16_t Des) {
// CHECK-LABEL: @f8(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i16, ptr [[EXP:%.*]], align 2
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i16, ptr [[DES:%.*]], align 2
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i16 [[CMPXCHG_EXPECTED]], i16 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 2
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i16 [[CMPXCHG_PREV]], ptr [[EXP]], align 2
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[EXP:%.*]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[DES:%.*]], align 2
+// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR:%.*]], i16 [[TMP0]], i16 [[TMP1]] seq_cst seq_cst, align 2
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i16, i1 } [[TMP2]], 1
+// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
+// CHECK: cmpxchg.store_expected:
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i16, i1 } [[TMP2]], 0
+// CHECK-NEXT: store i16 [[TMP4]], ptr [[EXP]], align 2
+// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
+// CHECK: cmpxchg.continue:
+// CHECK-NEXT: ret i1 [[TMP3]]
//
_Bool f8(int16_t *Ptr, int16_t *Exp, int16_t *Des) {
return __atomic_compare_exchange(Ptr, Exp, Des, 0,
diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i32.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i32.c
index 08bb0479a0da9a..ba630e7c952e5b 100644
--- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i32.c
+++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i32.c
@@ -68,12 +68,16 @@ int32_t f6(int32_t *Ptr, int32_t *Val, int32_t *Ret) {
// CHECK-LABEL: @f7(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[EXP:%.*]], align 4
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i32 [[CMPXCHG_EXPECTED]], i32 [[DES:%.*]] seq_cst seq_cst, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[EXP]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[EXP:%.*]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[PTR:%.*]], i32 [[TMP0]], i32 [[DES:%.*]] seq_cst seq_cst, align 4
+// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
+// CHECK-NEXT: br i1 [[TMP2]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
+// CHECK: cmpxchg.store_expected:
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
+// CHECK-NEXT: store i32 [[TMP3]], ptr [[EXP]], align 4
+// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
+// CHECK: cmpxchg.continue:
+// CHECK-NEXT: ret i1 [[TMP2]]
//
_Bool f7(int32_t *Ptr, int32_t *Exp, int32_t Des) {
return __atomic_compare_exchange_n(Ptr, Exp, Des, 0,
@@ -82,13 +86,17 @@ _Bool f7(int32_t *Ptr, int32_t *Exp, int32_t Des) {
// CHECK-LABEL: @f8(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[EXP:%.*]], align 4
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[DES:%.*]], align 4
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[EXP]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[EXP:%.*]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DES:%.*]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR:%.*]], i32 [[TMP0]], i32 [[TMP1]] seq_cst seq_cst, align 4
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
+// CHECK: cmpxchg.store_expected:
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
+// CHECK-NEXT: store i32 [[TMP4]], ptr [[EXP]], align 4
+// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
+// CHECK: cmpxchg.continue:
+// CHECK-NEXT: ret i1 [[TMP3]]
//
_Bool f8(int32_t *Ptr, int32_t *Exp, int32_t *Des) {
return __atomic_compare_exchange(Ptr, Exp, Des, 0,
diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i64.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i64.c
index 743637dd2b7906..25c69ee8c54bf5 100644
--- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i64.c
+++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i64.c
@@ -68,12 +68,16 @@ int64_t f6(int64_t *Ptr, int64_t *Val, int64_t *Ret) {
// CHECK-LABEL: @f7(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i64, ptr [[EXP:%.*]], align 8
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i64 [[CMPXCHG_EXPECTED]], i64 [[DES:%.*]] seq_cst seq_cst, align 8
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[EXP]], align 8
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[EXP:%.*]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[PTR:%.*]], i64 [[TMP0]], i64 [[DES:%.*]] seq_cst seq_cst, align 8
+// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+// CHECK-NEXT: br i1 [[TMP2]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
+// CHECK: cmpxchg.store_expected:
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+// CHECK-NEXT: store i64 [[TMP3]], ptr [[EXP]], align 8
+// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
+// CHECK: cmpxchg.continue:
+// CHECK-NEXT: ret i1 [[TMP2]]
//
_Bool f7(int64_t *Ptr, int64_t *Exp, int64_t Des) {
return __atomic_compare_exchange_n(Ptr, Exp, Des, 0,
@@ -82,13 +86,17 @@ _Bool f7(int64_t *Ptr, int64_t *Exp, int64_t Des) {
// CHECK-LABEL: @f8(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i64, ptr [[EXP:%.*]], align 8
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[DES:%.*]], align 8
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 8
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[EXP]], align 8
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[EXP:%.*]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[DES:%.*]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR:%.*]], i64 [[TMP0]], i64 [[TMP1]] seq_cst seq_cst, align 8
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
+// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
+// CHECK: cmpxchg.store_expected:
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
+// CHECK-NEXT: store i64 [[TMP4]], ptr [[EXP]], align 8
+// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
+// CHECK: cmpxchg.continue:
+// CHECK-NEXT: ret i1 [[TMP3]]
//
_Bool f8(int64_t *Ptr, int64_t *Exp, int64_t *Des) {
return __atomic_compare_exchange(Ptr, Exp, Des, 0,
diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i8.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i8.c
index 2114055c1cea7a..1f4b455bc02610 100644
--- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i8.c
+++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i8.c
@@ -68,12 +68,16 @@ int8_t f6(int8_t *Ptr, int8_t *Val, int8_t *Ret) {
// CHECK-LABEL: @f7(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i8, ptr [[EXP:%.*]], align 1
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i8 [[CMPXCHG_EXPECTED]], i8 [[DES:%.*]] seq_cst seq_cst, align 1
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i8 [[CMPXCHG_PREV]], ptr [[EXP]], align 1
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[EXP:%.*]], align 1
+// CHECK-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[PTR:%.*]], i8 [[TMP0]], i8 [[DES:%.*]] seq_cst seq_cst, align 1
+// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i8, i1 } [[TMP1]], 1
+// CHECK-NEXT: br i1 [[TMP2]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
+// CHECK: cmpxchg.store_expected:
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i8, i1 } [[TMP1]], 0
+// CHECK-NEXT: store i8 [[TMP3]], ptr [[EXP]], align 1
+// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
+// CHECK: cmpxchg.continue:
+// CHECK-NEXT: ret i1 [[TMP2]]
//
_Bool f7(int8_t *Ptr, int8_t *Exp, int8_t Des) {
return __atomic_compare_exchange_n(Ptr, Exp, Des, 0,
@@ -82,13 +86,17 @@ _Bool f7(int8_t *Ptr, int8_t *Exp, int8_t Des) {
// CHECK-LABEL: @f8(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i8, ptr [[EXP:%.*]], align 1
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i8, ptr [[DES:%.*]], align 1
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[PTR:%.*]], i8 [[CMPXCHG_EXPECTED]], i8 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 1
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i8 [[CMPXCHG_PREV]], ptr [[EXP]], align 1
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i8, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: ret i1 [[CMPXCHG_SUCCESS]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[EXP:%.*]], align 1
+// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[DES:%.*]], align 1
+// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR:%.*]], i8 [[TMP0]], i8 [[TMP1]] seq_cst seq_cst, align 1
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i8, i1 } [[TMP2]], 1
+// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
+// CHECK: cmpxchg.store_expected:
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i8, i1 } [[TMP2]], 0
+// CHECK-NEXT: store i8 [[TMP4]], ptr [[EXP]], align 1
+// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
+// CHECK: cmpxchg.continue:
+// CHECK-NEXT: ret i1 [[TMP3]]
//
_Bool f8(int8_t *Ptr, int8_t *Exp, int8_t *Des) {
return __atomic_compare_exchange(Ptr, Exp, Des, 0,
diff --git a/clang/test/CodeGen/X86/x86-atomic-long_double.c b/clang/test/CodeGen/X86/x86-atomic-long_double.c
index 07237206eb2520..2c3f381f13511e 100644
--- a/clang/test/CodeGen/X86/x86-atomic-long_double.c
+++ b/clang/test/CodeGen/X86/x86-atomic-long_double.c
@@ -1,5 +1,5 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// : %clang_cc1 -triple x86_64-linux-gnu -target-cpu core2 %s -emit-llvm -o - | FileCheck --check-prefixes=X64 %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -target-cpu core2 %s -emit-llvm -o - | FileCheck --check-prefixes=X64 %s
// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu core2 %s -emit-llvm -o - | FileCheck --check-prefixes=X86 %s
// X64-LABEL: define dso_local x86_fp80 @testinc(
@@ -14,6 +14,7 @@
// X64-NEXT: store float [[TMP2]], ptr [[RETVAL]], align 16
// X64-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16
// X64-NEXT: ret x86_fp80 [[TMP3]]
+//
// X86-LABEL: define dso_local x86_fp80 @testinc(
// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
// X86-NEXT: [[ENTRY:.*:]]
@@ -43,6 +44,7 @@ long double testinc(_Atomic long double *addr) {
// X64-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 16
// X64-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16
// X64-NEXT: ret x86_fp80 [[TMP2]]
+//
// X86-LABEL: define dso_local x86_fp80 @testdec(
// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
// X86-NEXT: [[ENTRY:.*:]]
@@ -96,6 +98,7 @@ long double testdec(_Atomic long double *addr) {
// X64-NEXT: store i128 [[ATOMIC_LOAD4]], ptr [[ATOMIC_TEMP5]], align 16
// X64-NEXT: [[TMP10:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP5]], align 16
// X64-NEXT: ret x86_fp80 [[TMP10]]
+//
// X86-LABEL: define dso_local x86_fp80 @testcompassign(
// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
// X86-NEXT: [[ENTRY:.*]]:
@@ -104,7 +107,6 @@ long double testdec(_Atomic long double *addr) {
// X86-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4
// X86-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 4
// X86-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 4
-// X86-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca x86_fp80, align 4
// X86-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
// X86-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
@@ -117,15 +119,13 @@ long double testdec(_Atomic long double *addr) {
// X86-NEXT: store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 4
// X86-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP2]], i8 0, i64 12, i1 false)
// X86-NEXT: store x86_fp80 [[SUB]], ptr [[ATOMIC_TEMP2]], align 4
-// X86-NEXT: [[__ATOMIC_COMPARE_EXCHANGE:%.*]] = call i8 @__atomic_compare_exchange(i32 12, ptr [[TMP0]], ptr [[ATOMIC_TEMP1]], ptr [[ATOMIC_TEMP2]], i32 5, i32 5)
-// X86-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE]], 0
-// X86-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP3]], ptr [[ATOMIC_TEMP1]], i64 12, i1 false)
-// X86-NEXT: [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 4
-// X86-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// X86-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP1]], ptr noundef [[ATOMIC_TEMP2]], i32 noundef 5, i32 noundef 5)
+// X86-NEXT: [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4
+// X86-NEXT: br i1 [[CALL]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
// X86: [[ATOMIC_CONT]]:
// X86-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
-// X86-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP4]], i32 noundef 5)
-// X86-NEXT: [[TMP5:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP4]], align 4
+// X86-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef 5)
+// X86-NEXT: [[TMP5:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 4
// X86-NEXT: ret x86_fp80 [[TMP5]]
//
long double testcompassign(_Atomic long double *addr) {
@@ -150,6 +150,7 @@ long double testcompassign(_Atomic long double *addr) {
// X64-NEXT: store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP1]], align 16
// X64-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 16
// X64-NEXT: ret x86_fp80 [[TMP3]]
+//
// X86-LABEL: define dso_local x86_fp80 @testassign(
// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
// X86-NEXT: [[ENTRY:.*:]]
@@ -184,6 +185,7 @@ long double testassign(_Atomic long double *addr) {
// X64-NEXT: store float [[TMP2]], ptr [[RETVAL]], align 16
// X64-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16
// X64-NEXT: ret x86_fp80 [[TMP3]]
+//
// X86-LABEL: define dso_local x86_fp80 @test_volatile_inc(
// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
// X86-NEXT: [[ENTRY:.*:]]
@@ -212,6 +214,7 @@ long double test_volatile_inc(volatile _Atomic long double *addr) {
// X64-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 16
// X64-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16
// X64-NEXT: ret x86_fp80 [[TMP2]]
+//
// X86-LABEL: define dso_local x86_fp80 @test_volatile_dec(
// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
// X86-NEXT: [[ENTRY:.*:]]
@@ -264,6 +267,7 @@ long double test_volatile_dec(volatile _Atomic long double *addr) {
// X64-NEXT: store i128 [[ATOMIC_LOAD4]], ptr [[ATOMIC_TEMP5]], align 16
// X64-NEXT: [[TMP10:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP5]], align 16
// X64-NEXT: ret x86_fp80 [[TMP10]]
+//
// X86-LABEL: define dso_local x86_fp80 @test_volatile_compassign(
// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
// X86-NEXT: [[ENTRY:.*]]:
@@ -272,7 +276,6 @@ long double test_volatile_dec(volatile _Atomic long double *addr) {
// X86-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4
// X86-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 4
// X86-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 4
-// X86-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca x86_fp80, align 4
// X86-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
// X86-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
@@ -285,15 +288,13 @@ long double test_volatile_dec(volatile _Atomic long double *addr) {
// X86-NEXT: store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 4
// X86-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP2]], i8 0, i64 12, i1 false)
// X86-NEXT: store x86_fp80 [[SUB]], ptr [[ATOMIC_TEMP2]], align 4
-// X86-NEXT: [[__ATOMIC_COMPARE_EXCHANGE:%.*]] = call i8 @__atomic_compare_exchange(i32 12, ptr [[TMP0]], ptr [[ATOMIC_TEMP1]], ptr [[ATOMIC_TEMP2]], i32 5, i32 5)
-// X86-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE]], 0
-// X86-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP3]], ptr [[ATOMIC_TEMP1]], i64 12, i1 false)
-// X86-NEXT: [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 4
-// X86-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// X86-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP1]], ptr noundef [[ATOMIC_TEMP2]], i32 noundef 5, i32 noundef 5)
+// X86-NEXT: [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4
+// X86-NEXT: br i1 [[CALL]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
// X86: [[ATOMIC_CONT]]:
// X86-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
-// X86-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP4]], i32 noundef 5)
-// X86-NEXT: [[TMP5:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP4]], align 4
+// X86-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef 5)
+// X86-NEXT: [[TMP5:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 4
// X86-NEXT: ret x86_fp80 [[TMP5]]
//
long double test_volatile_compassign(volatile _Atomic long double *addr) {
@@ -318,6 +319,7 @@ long double test_volatile_compassign(volatile _Atomic long double *addr) {
// X64-NEXT: store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP1]], align 16
// X64-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 16
// X64-NEXT: ret x86_fp80 [[TMP3]]
+//
// X86-LABEL: define dso_local x86_fp80 @test_volatile_assign(
// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
// X86-NEXT: [[ENTRY:.*:]]
diff --git a/clang/test/CodeGen/atomic-ops.c b/clang/test/CodeGen/atomic-ops.c
index 7bb946cc509e44..b6060dcc540f90 100644
--- a/clang/test/CodeGen/atomic-ops.c
+++ b/clang/test/CodeGen/atomic-ops.c
@@ -1,4 +1,3 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 %s -emit-llvm -o - -ffreestanding -ffake-address-space-map -triple=i686-apple-darwin9 | FileCheck %s
// REQUIRES: x86-registered-target
@@ -14,99 +13,165 @@
// Basic IRGen tests for __c11_atomic_* and GNU __atomic_*
int fi1(_Atomic(int) *i) {
+ // CHECK-LABEL: @fi1
+ // CHECK: load atomic i32, ptr {{.*}} seq_cst, align 4
return __c11_atomic_load(i, memory_order_seq_cst);
}
int fi1a(int *i) {
+ // CHECK-LABEL: @fi1a
+ // CHECK: load atomic i32, ptr {{.*}} seq_cst, align 4
int v;
__atomic_load(i, &v, memory_order_seq_cst);
return v;
}
int fi1b(int *i) {
+ // CHECK-LABEL: @fi1b
+ // CHECK: load atomic i32, ptr {{.*}} seq_cst, align 4
return __atomic_load_n(i, memory_order_seq_cst);
}
int fi1c(atomic_int *i) {
+ // CHECK-LABEL: @fi1c
+ // CHECK: load atomic i32, ptr {{.*}} seq_cst, align 4
return atomic_load(i);
}
void fi2(_Atomic(int) *i) {
+ // CHECK-LABEL: @fi2
+ // CHECK: store atomic i32 {{.*}} seq_cst, align 4
__c11_atomic_store(i, 1, memory_order_seq_cst);
}
void fi2a(int *i) {
+ // CHECK-LABEL: @fi2a
+ // CHECK: store atomic i32 {{.*}} seq_cst, align 4
int v = 1;
__atomic_store(i, &v, memory_order_seq_cst);
}
void fi2b(int *i) {
+ // CHECK-LABEL: @fi2b
+ // CHECK: store atomic i32 {{.*}} seq_cst, align 4
__atomic_store_n(i, 1, memory_order_seq_cst);
}
void fi2c(atomic_int *i) {
+ // CHECK-LABEL: @fi2c
+ // CHECK: store atomic i32 {{.*}} seq_cst, align 4
atomic_store(i, 1);
}
int fi3(_Atomic(int) *i) {
+ // CHECK-LABEL: @fi3
+ // CHECK: atomicrmw and {{.*}} seq_cst, align 4
+ // CHECK-NOT: and
return __c11_atomic_fetch_and(i, 1, memory_order_seq_cst);
}
int fi3a(int *i) {
+ // CHECK-LABEL: @fi3a
+ // CHECK: atomicrmw xor {{.*}} seq_cst, align 4
+ // CHECK-NOT: xor
return __atomic_fetch_xor(i, 1, memory_order_seq_cst);
}
int fi3b(int *i) {
+ // CHECK-LABEL: @fi3b
+ // CHECK: atomicrmw add {{.*}} seq_cst, align 4
+ // CHECK: add
return __atomic_add_fetch(i, 1, memory_order_seq_cst);
}
int fi3c(int *i) {
+ // CHECK-LABEL: @fi3c
+ // CHECK: atomicrmw nand {{.*}} seq_cst, align 4
+ // CHECK-NOT: and
return __atomic_fetch_nand(i, 1, memory_order_seq_cst);
}
int fi3d(int *i) {
+ // CHECK-LABEL: @fi3d
+ // CHECK: atomicrmw nand {{.*}} seq_cst, align 4
+ // CHECK: and
+ // CHECK: xor
return __atomic_nand_fetch(i, 1, memory_order_seq_cst);
}
int fi3e(atomic_int *i) {
+ // CHECK-LABEL: @fi3e
+ // CHECK: atomicrmw or {{.*}} seq_cst, align 4
+ // CHECK-NOT: {{ or }}
return atomic_fetch_or(i, 1);
}
int fi3f(int *i) {
+ // CHECK-LABEL: @fi3f
+ // CHECK-NOT: store volatile
+ // CHECK: atomicrmw or {{.*}} seq_cst, align 4
+ // CHECK-NOT: {{ or }}
return __atomic_fetch_or(i, (short)1, memory_order_seq_cst);
}
_Bool fi4(_Atomic(int) *i) {
+ // CHECK-LABEL: @fi4(
+ // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg ptr [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] acquire acquire, align 4
+ // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
+ // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
+ // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
+ // CHECK: store i32 [[OLD]]
int cmp = 0;
return __c11_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire);
}
_Bool fi4a(int *i) {
+ // CHECK-LABEL: @fi4a
+ // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg ptr [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] acquire acquire, align 4
+ // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
+ // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
+ // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
+ // CHECK: store i32 [[OLD]]
int cmp = 0;
int desired = 1;
return __atomic_compare_exchange(i, &cmp, &desired, 0, memory_order_acquire, memory_order_acquire);
}
_Bool fi4b(int *i) {
+ // CHECK-LABEL: @fi4b(
+ // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg weak ptr [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] acquire acquire, align 4
+ // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
+ // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
+ // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
+ // CHECK: store i32 [[OLD]]
int cmp = 0;
return __atomic_compare_exchange_n(i, &cmp, 1, 1, memory_order_acquire, memory_order_acquire);
}
_Bool fi4c(atomic_int *i) {
+ // CHECK-LABEL: @fi4c
+ // CHECK: cmpxchg ptr {{.*}} seq_cst seq_cst, align 4
int cmp = 0;
return atomic_compare_exchange_strong(i, &cmp, 1);
}
#define _AS1 __attribute__((address_space(1)))
_Bool fi4d(_Atomic(int) *i, int _AS1 *ptr2) {
+ // CHECK-LABEL: @fi4d(
+ // CHECK: [[EXPECTED:%[.0-9A-Z_a-z]+]] = load i32, ptr addrspace(1) %{{[0-9]+}}
+ // CHECK: cmpxchg ptr %{{[0-9]+}}, i32 [[EXPECTED]], i32 %{{[0-9]+}} acquire acquire, align 4
return __c11_atomic_compare_exchange_strong(i, ptr2, 1, memory_order_acquire, memory_order_acquire);
}
float ff1(_Atomic(float) *d) {
+ // CHECK-LABEL: @ff1
+ // CHECK: load atomic i32, ptr {{.*}} monotonic, align 4
return __c11_atomic_load(d, memory_order_relaxed);
}
void ff2(_Atomic(float) *d) {
+ // CHECK-LABEL: @ff2
+ // CHECK: store atomic i32 {{.*}} release, align 4
__c11_atomic_store(d, 1, memory_order_release);
}
@@ -119,70 +184,135 @@ struct S {
};
void implicit_store(_Atomic(struct S) *a, struct S s) {
+ // CHECK-LABEL: @implicit_store(
+ // CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} seq_cst, align 8
*a = s;
}
struct S implicit_load(_Atomic(struct S) *a) {
+ // CHECK-LABEL: @implicit_load(
+ // CHECK: load atomic i64, ptr %{{.*}} seq_cst, align 8
return *a;
}
struct S fd1(struct S *a) {
+ // CHECK-LABEL: @fd1
+ // CHECK: [[RETVAL:%.*]] = alloca %struct.S, align 4
+ // CHECK: [[TMP1:%.*]] = load atomic i64, ptr {{%.*}} seq_cst, align 4
+ // CHECK-NEXT: store i64 [[TMP1]], ptr [[RETVAL]], align 4
+ // CHECK: ret
struct S ret;
__atomic_load(a, &ret, memory_order_seq_cst);
return ret;
}
void fd2(struct S *a, struct S *b) {
+ // CHECK-LABEL: @fd2
+ // CHECK: [[A_ADDR:%.*]] = alloca ptr, align 4
+ // CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
+ // CHECK-NEXT: store ptr %a, ptr [[A_ADDR]], align 4
+ // CHECK-NEXT: store ptr %b, ptr [[B_ADDR]], align 4
+ // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+ // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+ // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, ptr [[LOAD_B_PTR]], align 4
+ // CHECK-NEXT: store atomic i64 [[LOAD_B]], ptr [[LOAD_A_PTR]] seq_cst, align 4
+ // CHECK-NEXT: ret void
__atomic_store(a, b, memory_order_seq_cst);
}
void fd3(struct S *a, struct S *b, struct S *c) {
+ // CHECK-LABEL: @fd3
+ // CHECK: [[A_ADDR:%.*]] = alloca ptr, align 4
+ // CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
+ // CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
+ // CHECK-NEXT: store ptr %a, ptr [[A_ADDR]], align 4
+ // CHECK-NEXT: store ptr %b, ptr [[B_ADDR]], align 4
+ // CHECK-NEXT: store ptr %c, ptr [[C_ADDR]], align 4
+ // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+ // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+ // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load ptr, ptr [[C_ADDR]], align 4
+ // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, ptr [[LOAD_B_PTR]], align 4
+ // CHECK-NEXT: [[RESULT:%.*]] = atomicrmw xchg ptr [[LOAD_A_PTR]], i64 [[LOAD_B]] seq_cst, align 4
+ // CHECK-NEXT: store i64 [[RESULT]], ptr [[LOAD_C_PTR]], align 4
__atomic_exchange(a, b, c, memory_order_seq_cst);
}
_Bool fd4(struct S *a, struct S *b, struct S *c) {
+ // CHECK-LABEL: @fd4
+ // CHECK: [[A_ADDR:%.*]] = alloca ptr, align 4
+ // CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
+ // CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
+ // CHECK: store ptr %a, ptr [[A_ADDR]], align 4
+ // CHECK-NEXT: store ptr %b, ptr [[B_ADDR]], align 4
+ // CHECK-NEXT: store ptr %c, ptr [[C_ADDR]], align 4
+ // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+ // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+ // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load ptr, ptr [[C_ADDR]], align 4
+ // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, ptr [[LOAD_B_PTR]], align 4
+ // CHECK-NEXT: [[LOAD_C:%.*]] = load i64, ptr [[LOAD_C_PTR]], align 4
+ // CHECK-NEXT: {{.*}} = cmpxchg weak ptr [[LOAD_A_PTR]], i64 [[LOAD_B]], i64 [[LOAD_C]] seq_cst seq_cst, align 4
return __atomic_compare_exchange(a, b, c, 1, 5, 5);
}
int* fp1(_Atomic(int*) *p) {
+ // CHECK-LABEL: @fp1
+ // CHECK: load atomic i32, ptr {{.*}} seq_cst, align 4
return __c11_atomic_load(p, memory_order_seq_cst);
}
int* fp2(_Atomic(int*) *p) {
+ // CHECK-LABEL: @fp2
+ // CHECK: store i32 4
+ // CHECK: atomicrmw add {{.*}} monotonic, align 4
return __c11_atomic_fetch_add(p, 1, memory_order_relaxed);
}
int *fp2a(int **p) {
+ // CHECK-LABEL: @fp2a
+ // CHECK: store i32 4
+ // CHECK: atomicrmw sub {{.*}} monotonic, align 4
// Note, the GNU builtins do not multiply by sizeof(T)!
return __atomic_fetch_sub(p, 4, memory_order_relaxed);
}
_Complex float fc(_Atomic(_Complex float) *c) {
+ // CHECK-LABEL: @fc
+ // CHECK: atomicrmw xchg ptr {{.*}} seq_cst, align 8
return __c11_atomic_exchange(c, 2, memory_order_seq_cst);
}
typedef struct X { int x; } X;
X fs(_Atomic(X) *c) {
+ // CHECK-LABEL: @fs
+ // CHECK: atomicrmw xchg ptr {{.*}} seq_cst, align 4
return __c11_atomic_exchange(c, (X){2}, memory_order_seq_cst);
}
X fsa(X *c, X *d) {
+ // CHECK-LABEL: @fsa
+ // CHECK: atomicrmw xchg ptr {{.*}} seq_cst, align 4
X ret;
__atomic_exchange(c, d, &ret, memory_order_seq_cst);
return ret;
}
_Bool fsb(_Bool *c) {
+ // CHECK-LABEL: @fsb
+ // CHECK: atomicrmw xchg ptr {{.*}} seq_cst, align 1
return __atomic_exchange_n(c, 1, memory_order_seq_cst);
}
char flag1;
volatile char flag2;
void test_and_set(void) {
+ // CHECK: atomicrmw xchg ptr @flag1, i8 1 seq_cst, align 1
__atomic_test_and_set(&flag1, memory_order_seq_cst);
+ // CHECK: atomicrmw volatile xchg ptr @flag2, i8 1 acquire, align 1
__atomic_test_and_set(&flag2, memory_order_acquire);
+ // CHECK: store atomic volatile i8 0, ptr @flag2 release, align 1
__atomic_clear(&flag2, memory_order_release);
+ // CHECK: store atomic i8 0, ptr @flag1 seq_cst, align 1
__atomic_clear(&flag1, memory_order_seq_cst);
}
@@ -196,18 +326,25 @@ struct Seventeen {
struct Incomplete;
int lock_free(struct Incomplete *incomplete) {
+ // CHECK-LABEL: @lock_free
+ // CHECK: call zeroext i1 @__atomic_is_lock_free(i32 noundef 3, ptr noundef null)
__c11_atomic_is_lock_free(3);
+ // CHECK: call zeroext i1 @__atomic_is_lock_free(i32 noundef 16, ptr noundef {{.*}}@sixteen{{.*}})
__atomic_is_lock_free(16, &sixteen);
+ // CHECK: call zeroext i1 @__atomic_is_lock_free(i32 noundef 17, ptr noundef {{.*}}@seventeen{{.*}})
__atomic_is_lock_free(17, &seventeen);
+ // CHECK: call zeroext i1 @__atomic_is_lock_free(i32 noundef 4, {{.*}})
__atomic_is_lock_free(4, incomplete);
char cs[20];
+ // CHECK: call zeroext i1 @__atomic_is_lock_free(i32 noundef 4, {{.*}})
__atomic_is_lock_free(4, cs+1);
+ // CHECK-NOT: call
__atomic_always_lock_free(3, 0);
__atomic_always_lock_free(16, 0);
__atomic_always_lock_free(17, 0);
@@ -217,6 +354,7 @@ int lock_free(struct Incomplete *incomplete) {
int n;
__atomic_is_lock_free(4, &n);
+ // CHECK: ret i32 1
return __c11_atomic_is_lock_free(sizeof(_Atomic(int)));
}
@@ -236,92 +374,229 @@ struct foo bigThing;
_Atomic(struct foo) bigAtomic;
void structAtomicStore(void) {
+ // CHECK-LABEL: @structAtomicStore
struct foo f = {0};
struct bar b = {0};
__atomic_store(&smallThing, &b, 5);
+ // CHECK: call void @__atomic_store(i32 noundef 3, ptr noundef @smallThing
__atomic_store(&bigThing, &f, 5);
+ // CHECK: call void @__atomic_store(i32 noundef 512, ptr noundef @bigThing
}
void structAtomicLoad(void) {
+ // CHECK-LABEL: @structAtomicLoad
struct bar b;
__atomic_load(&smallThing, &b, 5);
+ // CHECK: call void @__atomic_load(i32 noundef 3, ptr noundef @smallThing
struct foo f = {0};
__atomic_load(&bigThing, &f, 5);
+ // CHECK: call void @__atomic_load(i32 noundef 512, ptr noundef @bigThing
}
struct foo structAtomicExchange(void) {
+ // CHECK-LABEL: @structAtomicExchange
struct foo f = {0};
struct foo old;
__atomic_exchange(&f, &bigThing, &old, 5);
+ // CHECK: call void @__atomic_exchange(i32 noundef 512, {{.*}}, ptr noundef @bigThing,
return __c11_atomic_exchange(&bigAtomic, f, 5);
+ // CHECK: call void @__atomic_exchange(i32 noundef 512, ptr noundef @bigAtomic,
}
int structAtomicCmpExchange(void) {
+ // CHECK-LABEL: @structAtomicCmpExchange
+ // CHECK: %[[x_mem:.*]] = alloca i8
_Bool x = __atomic_compare_exchange(&smallThing, &thing1, &thing2, 1, 5, 5);
+ // CHECK: %[[call1:.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 3, {{.*}} @smallThing{{.*}} @thing1{{.*}} @thing2
+ // CHECK: %[[zext1:.*]] = zext i1 %[[call1]] to i8
+ // CHECK: store i8 %[[zext1]], ptr %[[x_mem]], align 1
+ // CHECK: %[[x:.*]] = load i8, ptr %[[x_mem]]
+ // CHECK: %[[x_bool:.*]] = trunc i8 %[[x]] to i1
+ // CHECK: %[[conv1:.*]] = zext i1 %[[x_bool]] to i32
struct foo f = {0};
struct foo g = {0};
g.big[12] = 12;
return x & __c11_atomic_compare_exchange_strong(&bigAtomic, &f, g, 5, 5);
+ // CHECK: %[[call2:.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 512, ptr noundef @bigAtomic,
+ // CHECK: %[[conv2:.*]] = zext i1 %[[call2]] to i32
+ // CHECK: %[[and:.*]] = and i32 %[[conv1]], %[[conv2]]
+ // CHECK: ret i32 %[[and]]
}
// Check that no atomic operations are used in any initialisation of _Atomic
// types.
_Atomic(int) atomic_init_i = 42;
+// CHECK-LABEL: @atomic_init_foo
void atomic_init_foo(void)
{
+ // CHECK-NOT: }
+ // CHECK-NOT: atomic
+ // CHECK: store
_Atomic(int) j = 12;
+ // CHECK-NOT: }
+ // CHECK-NOT: atomic
+ // CHECK: store
__c11_atomic_init(&j, 42);
+ // CHECK-NOT: atomic
+ // CHECK: }
}
+// CHECK-LABEL: @failureOrder
void failureOrder(_Atomic(int) *ptr, int *ptr2) {
__c11_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed);
+ // CHECK: cmpxchg ptr {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} acquire monotonic, align 4
__c11_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire);
+ // CHECK: cmpxchg weak ptr {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} seq_cst acquire, align 4
// Unknown ordering: conservatively pick strongest valid option (for now!).
__atomic_compare_exchange(ptr2, ptr2, ptr2, 0, memory_order_acq_rel, *ptr2);
+ // CHECK: cmpxchg ptr {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} acq_rel acquire, align 4
// Undefined behaviour: don't really care what that last ordering is so leave
// it out:
__atomic_compare_exchange_n(ptr2, ptr2, 43, 1, memory_order_seq_cst, 42);
+ // CHECK: cmpxchg weak ptr {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} seq_cst {{.*}}, align 4
}
+// CHECK-LABEL: @generalFailureOrder
void generalFailureOrder(_Atomic(int) *ptr, int *ptr2, int success, int fail) {
__c11_atomic_compare_exchange_strong(ptr, ptr2, 42, success, fail);
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+ // CHECK: switch i32 {{.*}}, label %[[MONOTONIC:[0-9a-zA-Z._]+]] [
+ // CHECK-NEXT: i32 1, label %[[ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 2, label %[[ACQUIRE]]
+ // CHECK-NEXT: i32 3, label %[[RELEASE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 4, label %[[ACQREL:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 5, label %[[SEQCST:[0-9a-zA-Z._]+]]
+
+ // CHECK: [[MONOTONIC]]
+ // CHECK: switch {{.*}}, label %[[MONOTONIC_MONOTONIC:[0-9a-zA-Z._]+]] [
+ // CHECK-NEXT: i32 1, label %[[MONOTONIC_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 2, label %[[MONOTONIC_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 5, label %[[MONOTONIC_SEQCST:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: ]
+
+ // CHECK: [[ACQUIRE]]
+ // CHECK: switch {{.*}}, label %[[ACQUIRE_MONOTONIC:[0-9a-zA-Z._]+]] [
+ // CHECK-NEXT: i32 1, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 2, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 5, label %[[ACQUIRE_SEQCST:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: ]
+
+ // CHECK: [[RELEASE]]
+ // CHECK: switch {{.*}}, label %[[RELEASE_MONOTONIC:[0-9a-zA-Z._]+]] [
+ // CHECK-NEXT: i32 1, label %[[RELEASE_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 2, label %[[RELEASE_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 5, label %[[RELEASE_SEQCST:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: ]
+
+ // CHECK: [[ACQREL]]
+ // CHECK: switch {{.*}}, label %[[ACQREL_MONOTONIC:[0-9a-zA-Z._]+]] [
+ // CHECK-NEXT: i32 1, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 2, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 5, label %[[ACQREL_SEQCST:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: ]
+
+ // CHECK: [[SEQCST]]
+ // CHECK: switch {{.*}}, label %[[SEQCST_MONOTONIC:[0-9a-zA-Z._]+]] [
+ // CHECK-NEXT: i32 1, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 2, label %[[SEQCST_ACQUIRE]]
+ // CHECK-NEXT: i32 5, label %[[SEQCST_SEQCST:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: ]
+
+ // CHECK: [[MONOTONIC_MONOTONIC]]
+ // CHECK: cmpxchg {{.*}} monotonic monotonic, align
+ // CHECK: br
+
+ // CHECK: [[MONOTONIC_ACQUIRE]]
+ // CHECK: cmpxchg {{.*}} monotonic acquire, align
+ // CHECK: br
+
+ // CHECK: [[MONOTONIC_SEQCST]]
+ // CHECK: cmpxchg {{.*}} monotonic seq_cst, align
+ // CHECK: br
+
+ // CHECK: [[ACQUIRE_MONOTONIC]]
+ // CHECK: cmpxchg {{.*}} acquire monotonic, align
+ // CHECK: br
+
+ // CHECK: [[ACQUIRE_ACQUIRE]]
+ // CHECK: cmpxchg {{.*}} acquire acquire, align
+ // CHECK: br
+
+ // CHECK: [[ACQUIRE_SEQCST]]
+ // CHECK: cmpxchg {{.*}} acquire seq_cst, align
+ // CHECK: br
+
+ // CHECK: [[RELEASE_MONOTONIC]]
+ // CHECK: cmpxchg {{.*}} release monotonic, align
+ // CHECK: br
+
+ // CHECK: [[RELEASE_ACQUIRE]]
+ // CHECK: cmpxchg {{.*}} release acquire, align
+ // CHECK: br
+
+ // CHECK: [[RELEASE_SEQCST]]
+ // CHECK: cmpxchg {{.*}} release seq_cst, align
+ // CHECK: br
+
+ // CHECK: [[ACQREL_MONOTONIC]]
+ // CHECK: cmpxchg {{.*}} acq_rel monotonic, align
+ // CHECK: br
+
+ // CHECK: [[ACQREL_ACQUIRE]]
+ // CHECK: cmpxchg {{.*}} acq_rel acquire, align
+ // CHECK: br
+
+ // CHECK: [[ACQREL_SEQCST]]
+ // CHECK: cmpxchg {{.*}} acq_rel seq_cst, align
+ // CHECK: br
+
+ // CHECK: [[SEQCST_MONOTONIC]]
+ // CHECK: cmpxchg {{.*}} seq_cst monotonic, align
+ // CHECK: br
+
+ // CHECK: [[SEQCST_ACQUIRE]]
+ // CHECK: cmpxchg {{.*}} seq_cst acquire, align
+ // CHECK: br
+
+ // CHECK: [[SEQCST_SEQCST]]
+ // CHECK: cmpxchg {{.*}} seq_cst seq_cst, align
+ // CHECK: br
}
void generalWeakness(int *ptr, int *ptr2, _Bool weak) {
__atomic_compare_exchange_n(ptr, ptr2, 42, weak, memory_order_seq_cst, memory_order_seq_cst);
+ // CHECK: switch i1 {{.*}}, label %[[WEAK:[0-9a-zA-Z._]+]] [
+ // CHECK-NEXT: i1 false, label %[[STRONG:[0-9a-zA-Z._]+]]
+ // CHECK: [[STRONG]]
+ // CHECK-NOT: br
+ // CHECK: cmpxchg {{.*}} seq_cst seq_cst, align
+ // CHECK: br
+ // CHECK: [[WEAK]]
+ // CHECK-NOT: br
+ // CHECK: cmpxchg weak {{.*}} seq_cst seq_cst, align
+ // CHECK: br
__atomic_compare_exchange_n(ptr, ptr2, 42, weak, memory_order_release, memory_order_acquire);
+ // CHECK: switch i1 {{.*}}, label %[[WEAK:[0-9a-zA-Z._]+]] [
+ // CHECK-NEXT: i1 false, label %[[STRONG:[0-9a-zA-Z._]+]]
+ // CHECK: [[STRONG]]
+ // CHECK-NOT: br
+ // CHECK: cmpxchg {{.*}} release acquire
+ // CHECK: br
+ // CHECK: [[WEAK]]
+ // CHECK-NOT: br
+ // CHECK: cmpxchg weak {{.*}} release acquire
+ // CHECK: br
}
// Having checked the flow in the previous two cases, we'll trust clang to
@@ -329,74 +604,185 @@ void generalWeakness(int *ptr, int *ptr2, _Bool weak) {
void EMIT_ALL_THE_THINGS(int *ptr, int *ptr2, int new, _Bool weak, int success, int fail) {
__atomic_compare_exchange(ptr, ptr2, &new, weak, success, fail);
+ // CHECK: = cmpxchg {{.*}} monotonic monotonic, align
+ // CHECK: = cmpxchg {{.*}} monotonic acquire, align
+ // CHECK: = cmpxchg {{.*}} monotonic seq_cst, align
+ // CHECK: = cmpxchg weak {{.*}} monotonic monotonic, align
+ // CHECK: = cmpxchg weak {{.*}} monotonic acquire, align
+ // CHECK: = cmpxchg weak {{.*}} monotonic seq_cst, align
+ // CHECK: = cmpxchg {{.*}} acquire monotonic, align
+ // CHECK: = cmpxchg {{.*}} acquire acquire, align
+ // CHECK: = cmpxchg {{.*}} acquire seq_cst, align
+ // CHECK: = cmpxchg weak {{.*}} acquire monotonic, align
+ // CHECK: = cmpxchg weak {{.*}} acquire acquire, align
+ // CHECK: = cmpxchg weak {{.*}} acquire seq_cst, align
+ // CHECK: = cmpxchg {{.*}} release monotonic, align
+ // CHECK: = cmpxchg {{.*}} release acquire, align
+ // CHECK: = cmpxchg {{.*}} release seq_cst, align
+ // CHECK: = cmpxchg weak {{.*}} release monotonic, align
+ // CHECK: = cmpxchg weak {{.*}} release acquire, align
+ // CHECK: = cmpxchg weak {{.*}} release seq_cst, align
+ // CHECK: = cmpxchg {{.*}} acq_rel monotonic, align
+ // CHECK: = cmpxchg {{.*}} acq_rel acquire, align
+ // CHECK: = cmpxchg {{.*}} acq_rel seq_cst, align
+ // CHECK: = cmpxchg weak {{.*}} acq_rel monotonic, align
+ // CHECK: = cmpxchg weak {{.*}} acq_rel acquire, align
+ // CHECK: = cmpxchg weak {{.*}} acq_rel seq_cst, align
+ // CHECK: = cmpxchg {{.*}} seq_cst monotonic, align
+ // CHECK: = cmpxchg {{.*}} seq_cst acquire, align
+ // CHECK: = cmpxchg {{.*}} seq_cst seq_cst, align
+ // CHECK: = cmpxchg weak {{.*}} seq_cst monotonic, align
+ // CHECK: = cmpxchg weak {{.*}} seq_cst acquire, align
+ // CHECK: = cmpxchg weak {{.*}} seq_cst seq_cst, align
}
int PR21643(void) {
return __atomic_or_fetch((int __attribute__((address_space(257))) *)0x308, 1,
__ATOMIC_RELAXED);
+ // CHECK: %[[atomictmp:.*]] = alloca i32, align 4
+ // CHECK: %[[atomicdst:.*]] = alloca i32, align 4
+ // CHECK: store i32 1, ptr %[[atomictmp]]
+ // CHECK: %[[one:.*]] = load i32, ptr %[[atomictmp]], align 4
+ // CHECK: %[[old:.*]] = atomicrmw or ptr addrspace(257) inttoptr (i32 776 to ptr addrspace(257)), i32 %[[one]] monotonic, align 4
+ // CHECK: %[[new:.*]] = or i32 %[[old]], %[[one]]
+ // CHECK: store i32 %[[new]], ptr %[[atomicdst]], align 4
+ // CHECK: %[[ret:.*]] = load i32, ptr %[[atomicdst]], align 4
+ // CHECK: ret i32 %[[ret]]
}
int PR17306_1(volatile _Atomic(int) *i) {
+ // CHECK-LABEL: @PR17306_1
+ // CHECK: %[[i_addr:.*]] = alloca ptr
+ // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32
+ // CHECK-NEXT: store ptr %i, ptr %[[i_addr]]
+ // CHECK-NEXT: %[[addr:.*]] = load ptr, ptr %[[i_addr]]
+ // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, ptr %[[addr]] seq_cst, align 4
+ // CHECK-NEXT: store i32 %[[res]], ptr %[[atomicdst]]
+ // CHECK-NEXT: %[[retval:.*]] = load i32, ptr %[[atomicdst]]
+ // CHECK-NEXT: ret i32 %[[retval]]
return __c11_atomic_load(i, memory_order_seq_cst);
}
int PR17306_2(volatile int *i, int value) {
+ // CHECK-LABEL: @PR17306_2
+ // CHECK: %[[i_addr:.*]] = alloca ptr
+ // CHECK-NEXT: %[[value_addr:.*]] = alloca i32
+ // CHECK-NEXT: %[[atomictmp:.*]] = alloca i32
+ // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32
+ // CHECK-NEXT: store ptr %i, ptr %[[i_addr]]
+ // CHECK-NEXT: store i32 %value, ptr %[[value_addr]]
+ // CHECK-NEXT: %[[i_lval:.*]] = load ptr, ptr %[[i_addr]]
+ // CHECK-NEXT: %[[value:.*]] = load i32, ptr %[[value_addr]]
+ // CHECK-NEXT: store i32 %[[value]], ptr %[[atomictmp]]
+ // CHECK-NEXT: %[[value_lval:.*]] = load i32, ptr %[[atomictmp]]
+ // CHECK-NEXT: %[[old_val:.*]] = atomicrmw volatile add ptr %[[i_lval]], i32 %[[value_lval]] seq_cst, align 4
+ // CHECK-NEXT: %[[new_val:.*]] = add i32 %[[old_val]], %[[value_lval]]
+ // CHECK-NEXT: store i32 %[[new_val]], ptr %[[atomicdst]]
+ // CHECK-NEXT: %[[retval:.*]] = load i32, ptr %[[atomicdst]]
+ // CHECK-NEXT: ret i32 %[[retval]]
return __atomic_add_fetch(i, value, memory_order_seq_cst);
}
void test_underaligned(void) {
+ // CHECK-LABEL: @test_underaligned
struct Underaligned { char c[8]; } underaligned_a, underaligned_b, underaligned_c;
+ // CHECK: load atomic i64, {{.*}}, align 1
__atomic_load(&underaligned_a, &underaligned_b, memory_order_seq_cst);
+ // CHECK: store atomic i64 {{.*}}, align 1
__atomic_store(&underaligned_a, &underaligned_b, memory_order_seq_cst);
+ // CHECK: atomicrmw xchg ptr {{.*}}, align 1
__atomic_exchange(&underaligned_a, &underaligned_b, &underaligned_c, memory_order_seq_cst);
+ // CHECK: cmpxchg weak ptr {{.*}}, align 1
__atomic_compare_exchange(&underaligned_a, &underaligned_b, &underaligned_c, 1, memory_order_seq_cst, memory_order_seq_cst);
__attribute__((aligned)) struct Underaligned aligned_a, aligned_b, aligned_c;
+ // CHECK: load atomic i64, {{.*}}, align 16
__atomic_load(&aligned_a, &aligned_b, memory_order_seq_cst);
+ // CHECK: store atomic i64 {{.*}}, align 16
__atomic_store(&aligned_a, &aligned_b, memory_order_seq_cst);
+ // CHECK: atomicrmw xchg ptr {{.*}}, align 16
__atomic_exchange(&aligned_a, &aligned_b, &aligned_c, memory_order_seq_cst);
+ // CHECK: cmpxchg weak ptr {{.*}}, align 16
__atomic_compare_exchange(&aligned_a, &aligned_b, &aligned_c, 1, memory_order_seq_cst, memory_order_seq_cst);
}
void test_c11_minmax(_Atomic(int) * si, _Atomic(unsigned) * ui, _Atomic(short) * ss, _Atomic(unsigned char) * uc, _Atomic(long long) * sll) {
+ // CHECK-LABEL: @test_c11_minmax
+ // CHECK: atomicrmw max ptr {{.*}} acquire, align 4
*si = __c11_atomic_fetch_max(si, 42, memory_order_acquire);
+ // CHECK: atomicrmw min ptr {{.*}} acquire, align 4
*si = __c11_atomic_fetch_min(si, 42, memory_order_acquire);
+ // CHECK: atomicrmw umax ptr {{.*}} acquire, align 4
*ui = __c11_atomic_fetch_max(ui, 42, memory_order_acquire);
+ // CHECK: atomicrmw umin ptr {{.*}} acquire, align 4
*ui = __c11_atomic_fetch_min(ui, 42, memory_order_acquire);
+ // CHECK: atomicrmw max ptr {{.*}} acquire, align 2
*ss = __c11_atomic_fetch_max(ss, 42, memory_order_acquire);
+ // CHECK: atomicrmw min ptr {{.*}} acquire, align 2
*ss = __c11_atomic_fetch_min(ss, 42, memory_order_acquire);
+ // CHECK: atomicrmw umax ptr {{.*}} acquire, align 1
*uc = __c11_atomic_fetch_max(uc, 42, memory_order_acquire);
+ // CHECK: atomicrmw umin ptr {{.*}} acquire, align 1
*uc = __c11_atomic_fetch_min(uc, 42, memory_order_acquire);
+ // CHECK: atomicrmw max ptr {{.*}} acquire, align 8
*sll = __c11_atomic_fetch_max(sll, 42, memory_order_acquire);
+ // CHECK: atomicrmw min ptr {{.*}} acquire, align 8
*sll = __c11_atomic_fetch_min(sll, 42, memory_order_acquire);
}
void test_minmax_postop(int *si, unsigned *ui, unsigned short *us, signed char *sc, unsigned long long *ull) {
int val = 42;
+ // CHECK-LABEL: @test_minmax_postop
+ // CHECK: [[OLD:%.*]] = atomicrmw max ptr [[PTR:%.*]], i32 [[RHS:%.*]] release, align 4
+ // CHECK: [[TST:%.*]] = icmp sgt i32 [[OLD]], [[RHS]]
+ // CHECK: [[NEW:%.*]] = select i1 [[TST]], i32 [[OLD]], i32 [[RHS]]
+ // CHECK: store i32 [[NEW]], ptr
*si = __atomic_max_fetch(si, 42, memory_order_release);
+ // CHECK: [[OLD:%.*]] = atomicrmw min ptr [[PTR:%.*]], i32 [[RHS:%.*]] release, align 4
+ // CHECK: [[TST:%.*]] = icmp slt i32 [[OLD]], [[RHS]]
+ // CHECK: [[NEW:%.*]] = select i1 [[TST]], i32 [[OLD]], i32 [[RHS]]
+ // CHECK: store i32 [[NEW]], ptr
*si = __atomic_min_fetch(si, 42, memory_order_release);
+ // CHECK: [[OLD:%.*]] = atomicrmw umax ptr [[PTR:%.*]], i32 [[RHS:%.*]] release, align 4
+ // CHECK: [[TST:%.*]] = icmp ugt i32 [[OLD]], [[RHS]]
+ // CHECK: [[NEW:%.*]] = select i1 [[TST]], i32 [[OLD]], i32 [[RHS]]
+ // CHECK: store i32 [[NEW]], ptr
*ui = __atomic_max_fetch(ui, 42, memory_order_release);
+ // CHECK: [[OLD:%.*]] = atomicrmw umin ptr [[PTR:%.*]], i32 [[RHS:%.*]] release, align 4
+ // CHECK: [[TST:%.*]] = icmp ult i32 [[OLD]], [[RHS]]
+ // CHECK: [[NEW:%.*]] = select i1 [[TST]], i32 [[OLD]], i32 [[RHS]]
+ // CHECK: store i32 [[NEW]], ptr
*ui = __atomic_min_fetch(ui, 42, memory_order_release);
+ // CHECK: [[OLD:%.*]] = atomicrmw umin ptr [[PTR:%.*]], i16 [[RHS:%.*]] release, align 2
+ // CHECK: [[TST:%.*]] = icmp ult i16 [[OLD]], [[RHS]]
+ // CHECK: [[NEW:%.*]] = select i1 [[TST]], i16 [[OLD]], i16 [[RHS]]
+ // CHECK: store i16 [[NEW]], ptr
*us = __atomic_min_fetch(us, 42, memory_order_release);
+ // CHECK: [[OLD:%.*]] = atomicrmw min ptr [[PTR:%.*]], i8 [[RHS:%.*]] release, align 1
+ // CHECK: [[TST:%.*]] = icmp slt i8 [[OLD]], [[RHS]]
+ // CHECK: [[NEW:%.*]] = select i1 [[TST]], i8 [[OLD]], i8 [[RHS]]
+ // CHECK: store i8 [[NEW]], ptr
*sc = __atomic_min_fetch(sc, 42, memory_order_release);
+ // CHECK: [[OLD:%.*]] = atomicrmw umin ptr {{%.*}}, i64 [[RHS:%.*]] release, align 4
+ // CHECK: [[TST:%.*]] = icmp ult i64 [[OLD]], [[RHS]]
+ // CHECK: [[NEW:%.*]] = select i1 [[TST]], i64 [[OLD]], i64 [[RHS]]
+ // CHECK: store i64 [[NEW]], ptr
*ull = __atomic_min_fetch(ull, 42, memory_order_release);
}
#endif
-//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-// CHECK: {{.*}}
diff --git a/clang/test/CodeGen/atomic_ops.c b/clang/test/CodeGen/atomic_ops.c
index 7f41fecdb6617d..fb34fc2a43836d 100644
--- a/clang/test/CodeGen/atomic_ops.c
+++ b/clang/test/CodeGen/atomic_ops.c
@@ -1,364 +1,116 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple x86_64 -emit-llvm %s \
// RUN: -o - | FileCheck -check-prefixes=CHECK,NATIVE %s
// RUN: %clang_cc1 -triple riscv32 -target-feature -a -emit-llvm %s \
// RUN: -o - | FileCheck -check-prefixes=CHECK,LIBCALL %s
-// NATIVE-LABEL: define dso_local void @foo(
-// NATIVE-SAME: i32 noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
-// NATIVE-NEXT: [[ENTRY:.*]]:
-// NATIVE-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
-// NATIVE-NEXT: [[I:%.*]] = alloca i32, align 4
-// NATIVE-NEXT: [[J:%.*]] = alloca i16, align 2
-// NATIVE-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4
-// NATIVE-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4
-// NATIVE-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4
-// NATIVE-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4
-// NATIVE-NEXT: [[ATOMIC_TEMP7:%.*]] = alloca i32, align 4
-// NATIVE-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4
-// NATIVE-NEXT: [[ATOMIC_TEMP19:%.*]] = alloca i16, align 2
-// NATIVE-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i16, align 2
-// NATIVE-NEXT: [[ATOMIC_TEMP21:%.*]] = alloca i16, align 2
-// NATIVE-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
-// NATIVE-NEXT: store i32 0, ptr [[I]], align 4
-// NATIVE-NEXT: store i16 0, ptr [[J]], align 2
-// NATIVE-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[I]] seq_cst, align 4
-// NATIVE-NEXT: br label %[[ATOMIC_OP:.*]]
-// NATIVE: [[ATOMIC_OP]]:
-// NATIVE-NEXT: [[TMP0:%.*]] = phi i32 [ [[ATOMIC_LOAD]], %[[ENTRY]] ], [ [[TMP1:%.*]], %[[ATOMIC_OP]] ]
-// NATIVE-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 2
-// NATIVE-NEXT: store i32 [[TMP0]], ptr [[ATOMIC_TEMP]], align 4
-// NATIVE-NEXT: store i32 [[MUL]], ptr [[ATOMIC_TEMP1]], align 4
-// NATIVE-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
-// NATIVE-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[ATOMIC_TEMP1]], align 4
-// NATIVE-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[I]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 4
-// NATIVE-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// NATIVE-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP2]], align 4
-// NATIVE-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
-// NATIVE-NEXT: [[TMP1]] = load i32, ptr [[ATOMIC_TEMP2]], align 4
-// NATIVE-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
-// NATIVE: [[ATOMIC_CONT]]:
-// NATIVE-NEXT: [[ATOMIC_LOAD4:%.*]] = load atomic i32, ptr [[I]] seq_cst, align 4
-// NATIVE-NEXT: br label %[[ATOMIC_OP3:.*]]
-// NATIVE: [[ATOMIC_OP3]]:
-// NATIVE-NEXT: [[TMP2:%.*]] = phi i32 [ [[ATOMIC_LOAD4]], %[[ATOMIC_CONT]] ], [ [[TMP3:%.*]], %[[ATOMIC_OP3]] ]
-// NATIVE-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP2]], 2
-// NATIVE-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP6]], align 4
-// NATIVE-NEXT: store i32 [[DIV]], ptr [[ATOMIC_TEMP7]], align 4
-// NATIVE-NEXT: [[CMPXCHG_EXPECTED9:%.*]] = load i32, ptr [[ATOMIC_TEMP6]], align 4
-// NATIVE-NEXT: [[CMPXCHG_DESIRED10:%.*]] = load i32, ptr [[ATOMIC_TEMP7]], align 4
-// NATIVE-NEXT: [[CMPXCHG_PAIR11:%.*]] = cmpxchg ptr [[I]], i32 [[CMPXCHG_EXPECTED9]], i32 [[CMPXCHG_DESIRED10]] seq_cst seq_cst, align 4
-// NATIVE-NEXT: [[CMPXCHG_PREV12:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR11]], 0
-// NATIVE-NEXT: store i32 [[CMPXCHG_PREV12]], ptr [[ATOMIC_TEMP8]], align 4
-// NATIVE-NEXT: [[CMPXCHG_SUCCESS13:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR11]], 1
-// NATIVE-NEXT: [[TMP3]] = load i32, ptr [[ATOMIC_TEMP8]], align 4
-// NATIVE-NEXT: br i1 [[CMPXCHG_SUCCESS13]], label %[[ATOMIC_CONT5:.*]], label %[[ATOMIC_OP3]]
-// NATIVE: [[ATOMIC_CONT5]]:
-// NATIVE-NEXT: [[TMP4:%.*]] = load i32, ptr [[X_ADDR]], align 4
-// NATIVE-NEXT: [[ATOMIC_LOAD15:%.*]] = load atomic i16, ptr [[J]] seq_cst, align 2
-// NATIVE-NEXT: br label %[[ATOMIC_OP14:.*]]
-// NATIVE: [[ATOMIC_OP14]]:
-// NATIVE-NEXT: [[TMP5:%.*]] = phi i16 [ [[ATOMIC_LOAD15]], %[[ATOMIC_CONT5]] ], [ [[TMP6:%.*]], %[[ATOMIC_OP14]] ]
-// NATIVE-NEXT: [[CONV:%.*]] = zext i16 [[TMP5]] to i32
-// NATIVE-NEXT: [[DIV16:%.*]] = sdiv i32 [[CONV]], [[TMP4]]
-// NATIVE-NEXT: [[CONV17:%.*]] = trunc i32 [[DIV16]] to i16
-// NATIVE-NEXT: store i16 [[TMP5]], ptr [[ATOMIC_TEMP19]], align 2
-// NATIVE-NEXT: store i16 [[CONV17]], ptr [[ATOMIC_TEMP20]], align 2
-// NATIVE-NEXT: [[CMPXCHG_EXPECTED22:%.*]] = load i16, ptr [[ATOMIC_TEMP19]], align 2
-// NATIVE-NEXT: [[CMPXCHG_DESIRED23:%.*]] = load i16, ptr [[ATOMIC_TEMP20]], align 2
-// NATIVE-NEXT: [[CMPXCHG_PAIR24:%.*]] = cmpxchg ptr [[J]], i16 [[CMPXCHG_EXPECTED22]], i16 [[CMPXCHG_DESIRED23]] seq_cst seq_cst, align 2
-// NATIVE-NEXT: [[CMPXCHG_PREV25:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR24]], 0
-// NATIVE-NEXT: store i16 [[CMPXCHG_PREV25]], ptr [[ATOMIC_TEMP21]], align 2
-// NATIVE-NEXT: [[CMPXCHG_SUCCESS26:%.*]] = extractvalue { i16, i1 } [[CMPXCHG_PAIR24]], 1
-// NATIVE-NEXT: [[TMP6]] = load i16, ptr [[ATOMIC_TEMP21]], align 2
-// NATIVE-NEXT: br i1 [[CMPXCHG_SUCCESS26]], label %[[ATOMIC_CONT18:.*]], label %[[ATOMIC_OP14]]
-// NATIVE: [[ATOMIC_CONT18]]:
-// NATIVE-NEXT: ret void
-//
-// LIBCALL-LABEL: define dso_local void @foo(
-// LIBCALL-SAME: i32 noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
-// LIBCALL-NEXT: [[ENTRY:.*]]:
-// LIBCALL-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
-// LIBCALL-NEXT: [[I:%.*]] = alloca i32, align 4
-// LIBCALL-NEXT: [[J:%.*]] = alloca i16, align 2
-// LIBCALL-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4
-// LIBCALL-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4
-// LIBCALL-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4
-// LIBCALL-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i32, align 4
-// LIBCALL-NEXT: [[ATOMIC_TEMP5:%.*]] = alloca i32, align 4
-// LIBCALL-NEXT: [[ATOMIC_TEMP7:%.*]] = alloca i32, align 4
-// LIBCALL-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4
-// LIBCALL-NEXT: [[ATOMIC_TEMP9:%.*]] = alloca i32, align 4
-// LIBCALL-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i16, align 2
-// LIBCALL-NEXT: [[ATOMIC_TEMP18:%.*]] = alloca i16, align 2
-// LIBCALL-NEXT: [[ATOMIC_TEMP19:%.*]] = alloca i16, align 2
-// LIBCALL-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i16, align 2
-// LIBCALL-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
-// LIBCALL-NEXT: store i32 0, ptr [[I]], align 4
-// LIBCALL-NEXT: store i16 0, ptr [[J]], align 2
-// LIBCALL-NEXT: call void @__atomic_load(i32 noundef 4, ptr noundef [[I]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
-// LIBCALL-NEXT: [[TMP0:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
-// LIBCALL-NEXT: br label %[[ATOMIC_OP:.*]]
-// LIBCALL: [[ATOMIC_OP]]:
-// LIBCALL-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP0]], %[[ENTRY]] ], [ [[TMP2:%.*]], %[[ATOMIC_OP]] ]
-// LIBCALL-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP1]], 2
-// LIBCALL-NEXT: store i32 [[TMP1]], ptr [[ATOMIC_TEMP1]], align 4
-// LIBCALL-NEXT: store i32 [[MUL]], ptr [[ATOMIC_TEMP2]], align 4
-// LIBCALL-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[ATOMIC_TEMP2]], align 4
-// LIBCALL-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_4:%.*]] = call i8 @__atomic_compare_exchange_4(ptr [[I]], ptr [[ATOMIC_TEMP1]], i32 [[CMPXCHG_DESIRED]], i32 5, i32 5)
-// LIBCALL-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_4]], 0
-// LIBCALL-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP3]], ptr [[ATOMIC_TEMP1]], i64 4, i1 false)
-// LIBCALL-NEXT: [[TMP2]] = load i32, ptr [[ATOMIC_TEMP3]], align 4
-// LIBCALL-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
-// LIBCALL: [[ATOMIC_CONT]]:
-// LIBCALL-NEXT: call void @__atomic_load(i32 noundef 4, ptr noundef [[I]], ptr noundef [[ATOMIC_TEMP5]], i32 noundef 5)
-// LIBCALL-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP5]], align 4
-// LIBCALL-NEXT: br label %[[ATOMIC_OP4:.*]]
-// LIBCALL: [[ATOMIC_OP4]]:
-// LIBCALL-NEXT: [[TMP4:%.*]] = phi i32 [ [[TMP3]], %[[ATOMIC_CONT]] ], [ [[TMP5:%.*]], %[[ATOMIC_OP4]] ]
-// LIBCALL-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 2
-// LIBCALL-NEXT: store i32 [[TMP4]], ptr [[ATOMIC_TEMP7]], align 4
-// LIBCALL-NEXT: store i32 [[DIV]], ptr [[ATOMIC_TEMP8]], align 4
-// LIBCALL-NEXT: [[CMPXCHG_DESIRED10:%.*]] = load i32, ptr [[ATOMIC_TEMP8]], align 4
-// LIBCALL-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_411:%.*]] = call i8 @__atomic_compare_exchange_4(ptr [[I]], ptr [[ATOMIC_TEMP7]], i32 [[CMPXCHG_DESIRED10]], i32 5, i32 5)
-// LIBCALL-NEXT: [[CMPXCHG_SUCCESS12:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_411]], 0
-// LIBCALL-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP9]], ptr [[ATOMIC_TEMP7]], i64 4, i1 false)
-// LIBCALL-NEXT: [[TMP5]] = load i32, ptr [[ATOMIC_TEMP9]], align 4
-// LIBCALL-NEXT: br i1 [[CMPXCHG_SUCCESS12]], label %[[ATOMIC_CONT6:.*]], label %[[ATOMIC_OP4]]
-// LIBCALL: [[ATOMIC_CONT6]]:
-// LIBCALL-NEXT: [[TMP6:%.*]] = load i32, ptr [[X_ADDR]], align 4
-// LIBCALL-NEXT: call void @__atomic_load(i32 noundef 2, ptr noundef [[J]], ptr noundef [[ATOMIC_TEMP14]], i32 noundef 5)
-// LIBCALL-NEXT: [[TMP7:%.*]] = load i16, ptr [[ATOMIC_TEMP14]], align 2
-// LIBCALL-NEXT: br label %[[ATOMIC_OP13:.*]]
-// LIBCALL: [[ATOMIC_OP13]]:
-// LIBCALL-NEXT: [[TMP8:%.*]] = phi i16 [ [[TMP7]], %[[ATOMIC_CONT6]] ], [ [[TMP9:%.*]], %[[ATOMIC_OP13]] ]
-// LIBCALL-NEXT: [[CONV:%.*]] = zext i16 [[TMP8]] to i32
-// LIBCALL-NEXT: [[DIV15:%.*]] = sdiv i32 [[CONV]], [[TMP6]]
-// LIBCALL-NEXT: [[CONV16:%.*]] = trunc i32 [[DIV15]] to i16
-// LIBCALL-NEXT: store i16 [[TMP8]], ptr [[ATOMIC_TEMP18]], align 2
-// LIBCALL-NEXT: store i16 [[CONV16]], ptr [[ATOMIC_TEMP19]], align 2
-// LIBCALL-NEXT: [[CMPXCHG_DESIRED21:%.*]] = load i16, ptr [[ATOMIC_TEMP19]], align 2
-// LIBCALL-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_2:%.*]] = call i8 @__atomic_compare_exchange_2(ptr [[J]], ptr [[ATOMIC_TEMP18]], i16 [[CMPXCHG_DESIRED21]], i32 5, i32 5)
-// LIBCALL-NEXT: [[CMPXCHG_SUCCESS22:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_2]], 0
-// LIBCALL-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP20]], ptr [[ATOMIC_TEMP18]], i64 2, i1 false)
-// LIBCALL-NEXT: [[TMP9]] = load i16, ptr [[ATOMIC_TEMP20]], align 2
-// LIBCALL-NEXT: br i1 [[CMPXCHG_SUCCESS22]], label %[[ATOMIC_CONT17:.*]], label %[[ATOMIC_OP13]]
-// LIBCALL: [[ATOMIC_CONT17]]:
-// LIBCALL-NEXT: ret void
-//
void foo(int x)
{
_Atomic(int) i = 0;
_Atomic(short) j = 0;
// Check that multiply / divides on atomics produce a cmpxchg loop
i *= 2;
+ // NATIVE: mul nsw i32
+ // NATIVE: cmpxchg ptr {{.*}} seq_cst, align 4
+ // LIBCALL: mul nsw i32
+ // LIBCALL: i1 @__atomic_compare_exchange(i32 noundef 4,
i /= 2;
+ // NATIVE: sdiv i32
+ // NATIVE: cmpxchg ptr {{.*}} seq_cst, align 4
+ // LIBCALL: sdiv i32
+ // LIBCALL: i1 @__atomic_compare_exchange(i32 noundef 4,
j /= x;
+ // NATIVE: sdiv i32
+ // NATIVE: cmpxchg ptr {{.*}} seq_cst, align 2
+ // LIBCALL: sdiv i32
+ // LIBCALL: i1 @__atomic_compare_exchange(i32 noundef 2,
}
+// LIBCALL: declare void @__atomic_load(i32, ptr, ptr, i32) [[LC_ATTRS:#[0-9]+]]
+// LIBCALL: declare i1 @__atomic_compare_exchange(i32, ptr, ptr, ptr, i32, i32) [[LC_ATTRS:#[0-9]+]]
extern _Atomic _Bool b;
-// NATIVE-LABEL: define dso_local zeroext i1 @bar(
-// NATIVE-SAME: ) #[[ATTR0]] {
-// NATIVE-NEXT: [[ENTRY:.*:]]
-// NATIVE-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr @b seq_cst, align 1
-// NATIVE-NEXT: [[LOADEDV:%.*]] = trunc i8 [[ATOMIC_LOAD]] to i1
-// NATIVE-NEXT: ret i1 [[LOADEDV]]
-//
-// LIBCALL-LABEL: define dso_local zeroext i1 @bar(
-// LIBCALL-SAME: ) #[[ATTR0]] {
-// LIBCALL-NEXT: [[ENTRY:.*:]]
-// LIBCALL-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
-// LIBCALL-NEXT: call void @__atomic_load(i32 noundef 1, ptr noundef @b, ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
-// LIBCALL-NEXT: [[TMP0:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
-// LIBCALL-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
-// LIBCALL-NEXT: ret i1 [[LOADEDV]]
-//
_Bool bar(void) {
+// NATIVE-LABEL: @bar
+// NATIVE: %[[load:.*]] = load atomic i8, ptr @b seq_cst, align 1
+// NATIVE: %[[tobool:.*]] = trunc i8 %[[load]] to i1
+// NATIVE: ret i1 %[[tobool]]
+// LIBCALL-LABEL: @bar
+// LIBCALL: call void @__atomic_load(i32 noundef 1, ptr noundef @b, ptr noundef %atomic-temp, i32 noundef 5)
+// LIBCALL: %[[load:.*]] = load i8, ptr %atomic-temp
+// LIBCALL: %[[tobool:.*]] = trunc i8 %[[load]] to i1
+// LIBCALL: ret i1 %[[tobool]]
return b;
}
extern _Atomic(_Complex int) x;
-// NATIVE-LABEL: define dso_local void @baz(
-// NATIVE-SAME: i32 noundef [[Y:%.*]]) #[[ATTR0]] {
-// NATIVE-NEXT: [[ENTRY:.*:]]
-// NATIVE-NEXT: [[Y_ADDR:%.*]] = alloca i32, align 4
-// NATIVE-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { i32, i32 }, align 8
-// NATIVE-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca { i32, i32 }, align 8
-// NATIVE-NEXT: store i32 [[Y]], ptr [[Y_ADDR]], align 4
-// NATIVE-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y_ADDR]], align 4
-// NATIVE-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i64, ptr @x seq_cst, align 8
-// NATIVE-NEXT: store i64 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP]], align 8
-// NATIVE-NEXT: [[ATOMIC_TEMP_REALP:%.*]] = getelementptr inbounds { i32, i32 }, ptr [[ATOMIC_TEMP]], i32 0, i32 0
-// NATIVE-NEXT: [[ATOMIC_TEMP_REAL:%.*]] = load i32, ptr [[ATOMIC_TEMP_REALP]], align 8
-// NATIVE-NEXT: [[ATOMIC_TEMP_IMAGP:%.*]] = getelementptr inbounds { i32, i32 }, ptr [[ATOMIC_TEMP]], i32 0, i32 1
-// NATIVE-NEXT: [[ATOMIC_TEMP_IMAG:%.*]] = load i32, ptr [[ATOMIC_TEMP_IMAGP]], align 4
-// NATIVE-NEXT: [[ADD_R:%.*]] = add i32 [[ATOMIC_TEMP_REAL]], [[TMP0]]
-// NATIVE-NEXT: [[ADD_I:%.*]] = add i32 [[ATOMIC_TEMP_IMAG]], 0
-// NATIVE-NEXT: [[ATOMIC_TEMP1_REALP:%.*]] = getelementptr inbounds { i32, i32 }, ptr [[ATOMIC_TEMP1]], i32 0, i32 0
-// NATIVE-NEXT: [[ATOMIC_TEMP1_IMAGP:%.*]] = getelementptr inbounds { i32, i32 }, ptr [[ATOMIC_TEMP1]], i32 0, i32 1
-// NATIVE-NEXT: store i32 [[ADD_R]], ptr [[ATOMIC_TEMP1_REALP]], align 8
-// NATIVE-NEXT: store i32 [[ADD_I]], ptr [[ATOMIC_TEMP1_IMAGP]], align 4
-// NATIVE-NEXT: [[TMP1:%.*]] = load i64, ptr [[ATOMIC_TEMP1]], align 8
-// NATIVE-NEXT: store atomic i64 [[TMP1]], ptr @x seq_cst, align 8
-// NATIVE-NEXT: ret void
-//
-// LIBCALL-LABEL: define dso_local void @baz(
-// LIBCALL-SAME: i32 noundef [[Y:%.*]]) #[[ATTR0]] {
-// LIBCALL-NEXT: [[ENTRY:.*:]]
-// LIBCALL-NEXT: [[Y_ADDR:%.*]] = alloca i32, align 4
-// LIBCALL-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { i32, i32 }, align 8
-// LIBCALL-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca { i32, i32 }, align 8
-// LIBCALL-NEXT: store i32 [[Y]], ptr [[Y_ADDR]], align 4
-// LIBCALL-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y_ADDR]], align 4
-// LIBCALL-NEXT: call void @__atomic_load(i32 noundef 8, ptr noundef @x, ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
-// LIBCALL-NEXT: [[ATOMIC_TEMP_REALP:%.*]] = getelementptr inbounds { i32, i32 }, ptr [[ATOMIC_TEMP]], i32 0, i32 0
-// LIBCALL-NEXT: [[ATOMIC_TEMP_REAL:%.*]] = load i32, ptr [[ATOMIC_TEMP_REALP]], align 8
-// LIBCALL-NEXT: [[ATOMIC_TEMP_IMAGP:%.*]] = getelementptr inbounds { i32, i32 }, ptr [[ATOMIC_TEMP]], i32 0, i32 1
-// LIBCALL-NEXT: [[ATOMIC_TEMP_IMAG:%.*]] = load i32, ptr [[ATOMIC_TEMP_IMAGP]], align 4
-// LIBCALL-NEXT: [[ADD_R:%.*]] = add i32 [[ATOMIC_TEMP_REAL]], [[TMP0]]
-// LIBCALL-NEXT: [[ADD_I:%.*]] = add i32 [[ATOMIC_TEMP_IMAG]], 0
-// LIBCALL-NEXT: [[ATOMIC_TEMP1_REALP:%.*]] = getelementptr inbounds { i32, i32 }, ptr [[ATOMIC_TEMP1]], i32 0, i32 0
-// LIBCALL-NEXT: [[ATOMIC_TEMP1_IMAGP:%.*]] = getelementptr inbounds { i32, i32 }, ptr [[ATOMIC_TEMP1]], i32 0, i32 1
-// LIBCALL-NEXT: store i32 [[ADD_R]], ptr [[ATOMIC_TEMP1_REALP]], align 8
-// LIBCALL-NEXT: store i32 [[ADD_I]], ptr [[ATOMIC_TEMP1_IMAGP]], align 4
-// LIBCALL-NEXT: call void @__atomic_store(i32 noundef 8, ptr noundef @x, ptr noundef [[ATOMIC_TEMP1]], i32 noundef 5)
-// LIBCALL-NEXT: ret void
-//
void baz(int y) {
+// NATIVE-LABEL: @baz
+// NATIVE: store atomic i64 {{.*}} seq_cst, align 8
+// LIBCALL-LABEL: @baz
+// LIBCALL: call void @__atomic_store
x += y;
}
+// LIBCALL: declare void @__atomic_store(i32, ptr, ptr, i32) [[LC_ATTRS:#[0-9]+]]
-// CHECK-LABEL: define dso_local i32 @compound_add(
-// CHECK-SAME: i32 [[IN:%.*]]) #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT: store i32 [[IN]], ptr [[IN_ADDR]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw add ptr [[IN_ADDR]], i32 5 seq_cst, align 4
-// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 5
-// CHECK-NEXT: ret i32 [[TMP1]]
-//
_Atomic(int) compound_add(_Atomic(int) in) {
+// CHECK-LABEL: @compound_add
+// CHECK: [[OLD:%.*]] = atomicrmw add ptr {{.*}}, i32 5 seq_cst, align 4
+// CHECK: [[NEW:%.*]] = add i32 [[OLD]], 5
+// CHECK: ret i32 [[NEW]]
return (in += 5);
}
-// CHECK-LABEL: define dso_local i32 @compound_sub(
-// CHECK-SAME: i32 [[IN:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT: store i32 [[IN]], ptr [[IN_ADDR]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw sub ptr [[IN_ADDR]], i32 5 seq_cst, align 4
-// CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], 5
-// CHECK-NEXT: ret i32 [[TMP1]]
-//
_Atomic(int) compound_sub(_Atomic(int) in) {
+// CHECK-LABEL: @compound_sub
+// CHECK: [[OLD:%.*]] = atomicrmw sub ptr {{.*}}, i32 5 seq_cst, align 4
+// CHECK: [[NEW:%.*]] = sub i32 [[OLD]], 5
+// CHECK: ret i32 [[NEW]]
return (in -= 5);
}
-// CHECK-LABEL: define dso_local i32 @compound_xor(
-// CHECK-SAME: i32 [[IN:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT: store i32 [[IN]], ptr [[IN_ADDR]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw xor ptr [[IN_ADDR]], i32 5 seq_cst, align 4
-// CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[TMP0]], 5
-// CHECK-NEXT: ret i32 [[TMP1]]
-//
_Atomic(int) compound_xor(_Atomic(int) in) {
+// CHECK-LABEL: @compound_xor
+// CHECK: [[OLD:%.*]] = atomicrmw xor ptr {{.*}}, i32 5 seq_cst, align 4
+// CHECK: [[NEW:%.*]] = xor i32 [[OLD]], 5
+// CHECK: ret i32 [[NEW]]
return (in ^= 5);
}
-// CHECK-LABEL: define dso_local i32 @compound_or(
-// CHECK-SAME: i32 [[IN:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT: store i32 [[IN]], ptr [[IN_ADDR]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw or ptr [[IN_ADDR]], i32 5 seq_cst, align 4
-// CHECK-NEXT: [[TMP1:%.*]] = or i32 [[TMP0]], 5
-// CHECK-NEXT: ret i32 [[TMP1]]
-//
_Atomic(int) compound_or(_Atomic(int) in) {
+// CHECK-LABEL: @compound_or
+// CHECK: [[OLD:%.*]] = atomicrmw or ptr {{.*}}, i32 5 seq_cst, align 4
+// CHECK: [[NEW:%.*]] = or i32 [[OLD]], 5
+// CHECK: ret i32 [[NEW]]
return (in |= 5);
}
-// CHECK-LABEL: define dso_local i32 @compound_and(
-// CHECK-SAME: i32 [[IN:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT: store i32 [[IN]], ptr [[IN_ADDR]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw and ptr [[IN_ADDR]], i32 5 seq_cst, align 4
-// CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 5
-// CHECK-NEXT: ret i32 [[TMP1]]
-//
_Atomic(int) compound_and(_Atomic(int) in) {
+// CHECK-LABEL: @compound_and
+// CHECK: [[OLD:%.*]] = atomicrmw and ptr {{.*}}, i32 5 seq_cst, align 4
+// CHECK: [[NEW:%.*]] = and i32 [[OLD]], 5
+// CHECK: ret i32 [[NEW]]
return (in &= 5);
}
-// NATIVE-LABEL: define dso_local i32 @compound_mul(
-// NATIVE-SAME: i32 [[IN:%.*]]) #[[ATTR0]] {
-// NATIVE-NEXT: [[ENTRY:.*]]:
-// NATIVE-NEXT: [[IN_ADDR:%.*]] = alloca i32, align 4
-// NATIVE-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4
-// NATIVE-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4
-// NATIVE-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4
-// NATIVE-NEXT: store i32 [[IN]], ptr [[IN_ADDR]], align 4
-// NATIVE-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[IN_ADDR]] seq_cst, align 4
-// NATIVE-NEXT: br label %[[ATOMIC_OP:.*]]
-// NATIVE: [[ATOMIC_OP]]:
-// NATIVE-NEXT: [[TMP0:%.*]] = phi i32 [ [[ATOMIC_LOAD]], %[[ENTRY]] ], [ [[TMP1:%.*]], %[[ATOMIC_OP]] ]
-// NATIVE-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 5
-// NATIVE-NEXT: store i32 [[TMP0]], ptr [[ATOMIC_TEMP]], align 4
-// NATIVE-NEXT: store i32 [[MUL]], ptr [[ATOMIC_TEMP1]], align 4
-// NATIVE-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
-// NATIVE-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[ATOMIC_TEMP1]], align 4
-// NATIVE-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[IN_ADDR]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] seq_cst seq_cst, align 4
-// NATIVE-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// NATIVE-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[ATOMIC_TEMP2]], align 4
-// NATIVE-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
-// NATIVE-NEXT: [[TMP1]] = load i32, ptr [[ATOMIC_TEMP2]], align 4
-// NATIVE-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
-// NATIVE: [[ATOMIC_CONT]]:
-// NATIVE-NEXT: ret i32 [[MUL]]
-//
-// LIBCALL-LABEL: define dso_local i32 @compound_mul(
-// LIBCALL-SAME: i32 [[IN:%.*]]) #[[ATTR0]] {
-// LIBCALL-NEXT: [[ENTRY:.*]]:
-// LIBCALL-NEXT: [[IN_ADDR:%.*]] = alloca i32, align 4
-// LIBCALL-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4
-// LIBCALL-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4
-// LIBCALL-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4
-// LIBCALL-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i32, align 4
-// LIBCALL-NEXT: store i32 [[IN]], ptr [[IN_ADDR]], align 4
-// LIBCALL-NEXT: call void @__atomic_load(i32 noundef 4, ptr noundef [[IN_ADDR]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
-// LIBCALL-NEXT: [[TMP0:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
-// LIBCALL-NEXT: br label %[[ATOMIC_OP:.*]]
-// LIBCALL: [[ATOMIC_OP]]:
-// LIBCALL-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP0]], %[[ENTRY]] ], [ [[TMP2:%.*]], %[[ATOMIC_OP]] ]
-// LIBCALL-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP1]], 5
-// LIBCALL-NEXT: store i32 [[TMP1]], ptr [[ATOMIC_TEMP1]], align 4
-// LIBCALL-NEXT: store i32 [[MUL]], ptr [[ATOMIC_TEMP2]], align 4
-// LIBCALL-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[ATOMIC_TEMP2]], align 4
-// LIBCALL-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_4:%.*]] = call i8 @__atomic_compare_exchange_4(ptr [[IN_ADDR]], ptr [[ATOMIC_TEMP1]], i32 [[CMPXCHG_DESIRED]], i32 5, i32 5)
-// LIBCALL-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_4]], 0
-// LIBCALL-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP3]], ptr [[ATOMIC_TEMP1]], i64 4, i1 false)
-// LIBCALL-NEXT: [[TMP2]] = load i32, ptr [[ATOMIC_TEMP3]], align 4
-// LIBCALL-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
-// LIBCALL: [[ATOMIC_CONT]]:
-// LIBCALL-NEXT: ret i32 [[MUL]]
-//
_Atomic(int) compound_mul(_Atomic(int) in) {
+// NATIVE-LABEL: @compound_mul
+// NATIVE: cmpxchg ptr {{%.*}}, i32 {{%.*}}, i32 [[NEW:%.*]] seq_cst seq_cst, align 4
+// NATIVE: ret i32 [[NEW]]
+// LIBCALL-LABEL: @compound_mul
+// LIBCALL: i1 @__atomic_compare_exchange(i32 noundef 4,
return (in *= 5);
}
+// LIBCALL: [[LC_ATTRS]] = { nounwind willreturn }
diff --git a/clang/test/CodeGen/c11atomics-ios.c b/clang/test/CodeGen/c11atomics-ios.c
index f48313941e329f..811820b67fbdbf 100644
--- a/clang/test/CodeGen/c11atomics-ios.c
+++ b/clang/test/CodeGen/c11atomics-ios.c
@@ -1,77 +1,264 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv7-apple-ios -std=c11 | FileCheck %s
// There isn't really anything special about iOS; it just happens to
// only deploy on processors with native atomics support, so it's a good
// way to test those code-paths.
+// CHECK-LABEL: define{{.*}} void @testFloat(ptr
void testFloat(_Atomic(float) *fp) {
+// CHECK: [[FP:%.*]] = alloca ptr
+// CHECK-NEXT: [[X:%.*]] = alloca float
+// CHECK-NEXT: [[F:%.*]] = alloca float
+// CHECK-NEXT: store ptr {{%.*}}, ptr [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
+// CHECK-NEXT: store float 1.000000e+00, ptr [[T0]], align 4
__c11_atomic_init(fp, 1.0f);
+// CHECK-NEXT: store float 2.000000e+00, ptr [[X]], align 4
_Atomic(float) x = 2.0f;
+// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
+// CHECK-NEXT: [[T2:%.*]] = load atomic float, ptr [[T0]] seq_cst, align 4
+// CHECK-NEXT: store float [[T2]], ptr [[F]]
float f = *fp;
+// CHECK-NEXT: [[T0:%.*]] = load float, ptr [[F]], align 4
+// CHECK-NEXT: [[T1:%.*]] = load ptr, ptr [[FP]], align 4
+// CHECK-NEXT: store atomic float [[T0]], ptr [[T1]] seq_cst, align 4
*fp = f;
+// CHECK-NEXT: ret void
}
+// CHECK: define{{.*}} void @testComplexFloat(ptr
void testComplexFloat(_Atomic(_Complex float) *fp) {
-
+// CHECK: [[FP:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[X:%.*]] = alloca [[CF:{ float, float }]], align 8
+// CHECK-NEXT: [[F:%.*]] = alloca [[CF]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = alloca [[CF]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = alloca [[CF]], align 8
+// CHECK-NEXT: store ptr
+
+// CHECK-NEXT: [[P:%.*]] = load ptr, ptr [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[P]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], ptr [[P]], i32 0, i32 1
+// CHECK-NEXT: store float 1.000000e+00, ptr [[T0]]
+// CHECK-NEXT: store float 0.000000e+00, ptr [[T1]]
__c11_atomic_init(fp, 1.0f);
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[X]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], ptr [[X]], i32 0, i32 1
+// CHECK-NEXT: store float 2.000000e+00, ptr [[T0]]
+// CHECK-NEXT: store float 0.000000e+00, ptr [[T1]]
_Atomic(_Complex float) x = 2.0f;
+// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
+// CHECK-NEXT: [[T2:%.*]] = load atomic i64, ptr [[T0]] seq_cst, align 8
+// CHECK-NEXT: store i64 [[T2]], ptr [[TMP0]], align 8
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: [[R:%.*]] = load float, ptr [[T0]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[TMP0]], i32 0, i32 1
+// CHECK-NEXT: [[I:%.*]] = load float, ptr [[T0]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[F]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], ptr [[F]], i32 0, i32 1
+// CHECK-NEXT: store float [[R]], ptr [[T0]]
+// CHECK-NEXT: store float [[I]], ptr [[T1]]
_Complex float f = *fp;
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[F]], i32 0, i32 0
+// CHECK-NEXT: [[R:%.*]] = load float, ptr [[T0]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[F]], i32 0, i32 1
+// CHECK-NEXT: [[I:%.*]] = load float, ptr [[T0]]
+// CHECK-NEXT: [[DEST:%.*]] = load ptr, ptr [[FP]], align 4
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[TMP1]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], ptr [[TMP1]], i32 0, i32 1
+// CHECK-NEXT: store float [[R]], ptr [[T0]]
+// CHECK-NEXT: store float [[I]], ptr [[T1]]
+// CHECK-NEXT: [[T1:%.*]] = load i64, ptr [[TMP1]], align 8
+// CHECK-NEXT: store atomic i64 [[T1]], ptr [[DEST]] seq_cst, align 8
*fp = f;
+// CHECK-NEXT: ret void
}
typedef struct { short x, y, z, w; } S;
+// CHECK: define{{.*}} void @testStruct(ptr
void testStruct(_Atomic(S) *fp) {
-
+// CHECK: [[FP:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[X:%.*]] = alloca [[S:.*]], align 8
+// CHECK-NEXT: [[F:%.*]] = alloca [[S:%.*]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = alloca [[S]], align 8
+// CHECK-NEXT: store ptr
+
+// CHECK-NEXT: [[P:%.*]] = load ptr, ptr [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[P]], i32 0, i32 0
+// CHECK-NEXT: store i16 1, ptr [[T0]], align 8
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[P]], i32 0, i32 1
+// CHECK-NEXT: store i16 2, ptr [[T0]], align 2
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[P]], i32 0, i32 2
+// CHECK-NEXT: store i16 3, ptr [[T0]], align 4
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[P]], i32 0, i32 3
+// CHECK-NEXT: store i16 4, ptr [[T0]], align 2
__c11_atomic_init(fp, (S){1,2,3,4});
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[X]], i32 0, i32 0
+// CHECK-NEXT: store i16 1, ptr [[T0]], align 8
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[X]], i32 0, i32 1
+// CHECK-NEXT: store i16 2, ptr [[T0]], align 2
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[X]], i32 0, i32 2
+// CHECK-NEXT: store i16 3, ptr [[T0]], align 4
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[X]], i32 0, i32 3
+// CHECK-NEXT: store i16 4, ptr [[T0]], align 2
_Atomic(S) x = (S){1,2,3,4};
+// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
+// CHECK-NEXT: [[T2:%.*]] = load atomic i64, ptr [[T0]] seq_cst, align 8
+// CHECK-NEXT: store i64 [[T2]], ptr [[F]], align 2
S f = *fp;
+// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[TMP0]], ptr align 2 [[F]], i32 8, i1 false)
+// CHECK-NEXT: [[T4:%.*]] = load i64, ptr [[TMP0]], align 8
+// CHECK-NEXT: store atomic i64 [[T4]], ptr [[T0]] seq_cst, align 8
*fp = f;
+// CHECK-NEXT: ret void
}
typedef struct { short x, y, z; } PS;
+// CHECK: define{{.*}} void @testPromotedStruct(ptr
void testPromotedStruct(_Atomic(PS) *fp) {
-
+// CHECK: [[FP:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[X:%.*]] = alloca [[APS:.*]], align 8
+// CHECK-NEXT: [[F:%.*]] = alloca [[PS:%.*]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = alloca [[APS]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = alloca [[APS]], align 8
+// CHECK-NEXT: store ptr
+
+// CHECK-NEXT: [[P:%.*]] = load ptr, ptr [[FP]]
+// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[P]], i8 0, i64 8, i1 false)
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], ptr [[P]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 0
+// CHECK-NEXT: store i16 1, ptr [[T1]], align 8
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 1
+// CHECK-NEXT: store i16 2, ptr [[T1]], align 2
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 2
+// CHECK-NEXT: store i16 3, ptr [[T1]], align 4
__c11_atomic_init(fp, (PS){1,2,3});
+// CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 8 [[X]], i8 0, i32 8, i1 false)
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], ptr [[X]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 0
+// CHECK-NEXT: store i16 1, ptr [[T1]], align 8
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 1
+// CHECK-NEXT: store i16 2, ptr [[T1]], align 2
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 2
+// CHECK-NEXT: store i16 3, ptr [[T1]], align 4
_Atomic(PS) x = (PS){1,2,3};
+// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
+// CHECK-NEXT: [[T2:%.*]] = load atomic i64, ptr [[T0]] seq_cst, align 8
+// CHECK-NEXT: store i64 [[T2]], ptr [[TMP0]], align 8
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], ptr [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[F]], ptr align 8 [[T0]], i32 6, i1 false)
PS f = *fp;
+// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
+// CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 8 [[TMP1]], i8 0, i32 8, i1 false)
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]], ptr [[TMP1]], i32 0, i32 0
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[T1]], ptr align 2 [[F]], i32 6, i1 false)
+// CHECK-NEXT: [[T5:%.*]] = load i64, ptr [[TMP1]], align 8
+// CHECK-NEXT: store atomic i64 [[T5]], ptr [[T0]] seq_cst, align 8
*fp = f;
+// CHECK-NEXT: ret void
}
PS test_promoted_load(_Atomic(PS) *addr) {
+ // CHECK-LABEL: @test_promoted_load(ptr dead_on_unwind noalias writable sret(%struct.PS) align 2 %agg.result, ptr noundef %addr)
+ // CHECK: [[ADDR_ARG:%.*]] = alloca ptr, align 4
+ // CHECK: [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+ // CHECK: store ptr %addr, ptr [[ADDR_ARG]], align 4
+ // CHECK: [[ADDR:%.*]] = load ptr, ptr [[ADDR_ARG]], align 4
+ // CHECK: [[VAL:%.*]] = load atomic i64, ptr [[ADDR]] seq_cst, align 8
+ // CHECK: store i64 [[VAL]], ptr [[ATOMIC_RES]], align 8
+ // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 2 %agg.result, ptr align 8 [[ATOMIC_RES]], i32 6, i1 false)
return __c11_atomic_load(addr, 5);
}
void test_promoted_store(_Atomic(PS) *addr, PS *val) {
+ // CHECK-LABEL: @test_promoted_store(ptr noundef %addr, ptr noundef %val)
+ // CHECK: [[ADDR_ARG:%.*]] = alloca ptr, align 4
+ // CHECK: [[VAL_ARG:%.*]] = alloca ptr, align 4
+ // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+ // CHECK: [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+ // CHECK: store ptr %addr, ptr [[ADDR_ARG]], align 4
+ // CHECK: store ptr %val, ptr [[VAL_ARG]], align 4
+ // CHECK: [[ADDR:%.*]] = load ptr, ptr [[ADDR_ARG]], align 4
+ // CHECK: [[VAL:%.*]] = load ptr, ptr [[VAL_ARG]], align 4
+ // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[NONATOMIC_TMP]], ptr align 2 [[VAL]], i32 6, i1 false)
+ // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_VAL]], ptr align 2 [[NONATOMIC_TMP]], i64 6, i1 false)
+ // CHECK: [[VAL64:%.*]] = load i64, ptr [[ATOMIC_VAL]], align 8
+ // CHECK: store atomic i64 [[VAL64]], ptr [[ADDR]] seq_cst, align 8
__c11_atomic_store(addr, *val, 5);
}
PS test_promoted_exchange(_Atomic(PS) *addr, PS *val) {
+ // CHECK-LABEL: @test_promoted_exchange(ptr dead_on_unwind noalias writable sret(%struct.PS) align 2 %agg.result, ptr noundef %addr, ptr noundef %val)
+ // CHECK: [[ADDR_ARG:%.*]] = alloca ptr, align 4
+ // CHECK: [[VAL_ARG:%.*]] = alloca ptr, align 4
+ // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+ // CHECK: [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+ // CHECK: [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+ // CHECK: store ptr %addr, ptr [[ADDR_ARG]], align 4
+ // CHECK: store ptr %val, ptr [[VAL_ARG]], align 4
+ // CHECK: [[ADDR:%.*]] = load ptr, ptr [[ADDR_ARG]], align 4
+ // CHECK: [[VAL:%.*]] = load ptr, ptr [[VAL_ARG]], align 4
+ // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[NONATOMIC_TMP]], ptr align 2 [[VAL]], i32 6, i1 false)
+ // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_VAL]], ptr align 2 [[NONATOMIC_TMP]], i64 6, i1 false)
+ // CHECK: [[VAL64:%.*]] = load i64, ptr [[ATOMIC_VAL]], align 8
+ // CHECK: [[RES:%.*]] = atomicrmw xchg ptr [[ADDR]], i64 [[VAL64]] seq_cst, align 8
+ // CHECK: store i64 [[RES]], ptr [[ATOMIC_RES]], align 8
+ // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 2 %agg.result, ptr align 8 [[ATOMIC_RES]], i32 6, i1 false)
return __c11_atomic_exchange(addr, *val, 5);
}
_Bool test_promoted_cmpxchg(_Atomic(PS) *addr, PS *desired, PS *new) {
-
-
+ // CHECK: define{{.*}} zeroext i1 @test_promoted_cmpxchg(ptr noundef %addr, ptr noundef %desired, ptr noundef %new) #0 {
+ // CHECK: [[ADDR_ARG:%.*]] = alloca ptr, align 4
+ // CHECK: [[DESIRED_ARG:%.*]] = alloca ptr, align 4
+ // CHECK: [[NEW_ARG:%.*]] = alloca ptr, align 4
+ // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+ // CHECK: [[ATOMIC_DESIRED:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+ // CHECK: [[ATOMIC_NEW:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+ // CHECK: [[RES_ADDR:%.*]] = alloca i8, align 1
+ // CHECK: store ptr %addr, ptr [[ADDR_ARG]], align 4
+ // CHECK: store ptr %desired, ptr [[DESIRED_ARG]], align 4
+ // CHECK: store ptr %new, ptr [[NEW_ARG]], align 4
+ // CHECK: [[ADDR:%.*]] = load ptr, ptr [[ADDR_ARG]], align 4
+ // CHECK: [[DESIRED:%.*]] = load ptr, ptr [[DESIRED_ARG]], align 4
+ // CHECK: [[NEW:%.*]] = load ptr, ptr [[NEW_ARG]], align 4
+ // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[NONATOMIC_TMP]], ptr align 2 [[NEW]], i32 6, i1 false)
+ // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_DESIRED:%.*]], ptr align 2 [[DESIRED]], i64 6, i1 false)
+ // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_NEW]], ptr align 2 [[NONATOMIC_TMP]], i64 6, i1 false)
+ // CHECK: [[ATOMIC_DESIRED_VAL64:%.*]] = load i64, ptr [[ATOMIC_DESIRED:%.*]], align 8
+ // CHECK: [[ATOMIC_NEW_VAL64:%.*]] = load i64, ptr [[ATOMIC_NEW]], align 8
+ // CHECK: [[RES:%.*]] = cmpxchg ptr [[ADDR]], i64 [[ATOMIC_DESIRED_VAL64]], i64 [[ATOMIC_NEW_VAL64]] seq_cst seq_cst, align 8
+ // CHECK: [[RES_VAL64:%.*]] = extractvalue { i64, i1 } [[RES]], 0
+ // CHECK: [[RES_BOOL:%.*]] = extractvalue { i64, i1 } [[RES]], 1
+ // CHECK: br i1 [[RES_BOOL]], label {{%.*}}, label {{%.*}}
+
+ // CHECK: store i64 [[RES_VAL64]], ptr [[ATOMIC_DESIRED]], align 8
+ // CHECK: br label {{%.*}}
+
+ // CHECK: [[RES_BOOL8:%.*]] = zext i1 [[RES_BOOL]] to i8
+ // CHECK: store i8 [[RES_BOOL8]], ptr [[RES_ADDR]], align 1
+ // CHECK: [[RES_BOOL8:%.*]] = load i8, ptr [[RES_ADDR]], align 1
+ // CHECK: [[RETVAL:%.*]] = trunc i8 [[RES_BOOL8]] to i1
+ // CHECK: ret i1 [[RETVAL]]
return __c11_atomic_compare_exchange_strong(addr, desired, *new, 5, 5);
}
@@ -79,11 +266,15 @@ _Bool test_promoted_cmpxchg(_Atomic(PS) *addr, PS *desired, PS *new) {
struct Empty {};
struct Empty testEmptyStructLoad(_Atomic(struct Empty)* empty) {
+ // CHECK-LABEL: @testEmptyStructLoad(
+ // CHECK-NOT: @__atomic_load
+ // CHECK: load atomic i8, ptr %{{.*}} seq_cst, align 1
return *empty;
}
void testEmptyStructStore(_Atomic(struct Empty)* empty, struct Empty value) {
+ // CHECK-LABEL: @testEmptyStructStore(
+ // CHECK-NOT: @__atomic_store
+ // CHECK: store atomic i8 %{{.*}}, ptr %{{.*}} seq_cst, align 1
*empty = value;
}
-//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-// CHECK: {{.*}}
diff --git a/clang/test/CodeGen/c11atomics.c b/clang/test/CodeGen/c11atomics.c
index 8556e3b940661a..4da36ad4da0f92 100644
--- a/clang/test/CodeGen/c11atomics.c
+++ b/clang/test/CodeGen/c11atomics.c
@@ -1,4 +1,3 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv5-unknown-freebsd -std=c11 | FileCheck %s
// Test that we are generating atomicrmw instructions, rather than
@@ -18,13 +17,17 @@ struct elem;
struct ptr {
struct elem *ptr;
};
+// CHECK-DAG: %struct.ptr = type { ptr }
struct elem {
_Atomic(struct ptr) link;
};
struct ptr object;
+// CHECK-DAG: @object ={{.*}} global %struct.ptr zeroinitializer
+// CHECK-DAG: @testStructGlobal ={{.*}} global {{.*}} { i16 1, i16 2, i16 3, i16 4 }
+// CHECK-DAG: @testPromotedStructGlobal ={{.*}} global {{.*}} { %{{.*}} { i16 1, i16 2, i16 3 }, [2 x i8] zeroinitializer }
typedef int __attribute__((vector_size(16))) vector;
@@ -37,647 +40,386 @@ _Atomic(char*) p;
_Atomic(float) f;
_Atomic(vector) v;
-// CHECK-LABEL: define dso_local arm_aapcscc void @testinc(
-// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw xchg ptr @b, i8 1 seq_cst, align 1
-// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr @i, i32 1 seq_cst, align 4
-// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw add ptr @l, i64 1 seq_cst, align 8
-// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw add ptr @s, i16 1 seq_cst, align 2
-// CHECK-NEXT: store atomic i8 1, ptr @b seq_cst, align 1
-// CHECK-NEXT: [[TMP4:%.*]] = atomicrmw add ptr @i, i32 1 seq_cst, align 4
-// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 1
-// CHECK-NEXT: [[TMP6:%.*]] = atomicrmw add ptr @l, i64 1 seq_cst, align 8
-// CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 1
-// CHECK-NEXT: [[TMP8:%.*]] = atomicrmw add ptr @s, i16 1 seq_cst, align 2
-// CHECK-NEXT: [[TMP9:%.*]] = add i16 [[TMP8]], 1
-// CHECK-NEXT: ret void
-//
+// CHECK: testinc
void testinc(void)
{
// Special case for suffix bool++, sets to true and returns the old value.
+ // CHECK: atomicrmw xchg ptr @b, i8 1 seq_cst, align 1
b++;
+ // CHECK: atomicrmw add ptr @i, i32 1 seq_cst, align 4
i++;
+ // CHECK: atomicrmw add ptr @l, i64 1 seq_cst, align 8
l++;
+ // CHECK: atomicrmw add ptr @s, i16 1 seq_cst, align 2
s++;
// Prefix increment
// Special case for bool: set to true and return true
+ // CHECK: store atomic i8 1, ptr @b seq_cst, align 1
++b;
// Currently, we have no variant of atomicrmw that returns the new value, so
// we have to generate an atomic add, which returns the old value, and then a
// non-atomic add.
+ // CHECK: atomicrmw add ptr @i, i32 1 seq_cst, align 4
+ // CHECK: add i32
++i;
+ // CHECK: atomicrmw add ptr @l, i64 1 seq_cst, align 8
+ // CHECK: add i64
++l;
+ // CHECK: atomicrmw add ptr @s, i16 1 seq_cst, align 2
+ // CHECK: add i16
++s;
}
-// CHECK-LABEL: define dso_local arm_aapcscc void @testdec(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*]]:
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[ATOMIC_TEMP13:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i8, align 1
-// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 1, ptr noundef @b, ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
-// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
-// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
-// CHECK-NEXT: br label %[[ATOMIC_OP:.*]]
-// CHECK: [[ATOMIC_OP]]:
-// CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[STOREDV]], %[[ENTRY]] ], [ [[STOREDV5:%.*]], %[[ATOMIC_OP]] ]
-// CHECK-NEXT: [[DEC:%.*]] = add i8 [[TMP1]], -1
-// CHECK-NEXT: store i8 [[TMP1]], ptr [[ATOMIC_TEMP1]], align 1
-// CHECK-NEXT: store i8 [[DEC]], ptr [[ATOMIC_TEMP2]], align 1
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i8, ptr [[ATOMIC_TEMP2]], align 1
-// CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_1:%.*]] = call i8 @__atomic_compare_exchange_1(ptr @b, ptr [[ATOMIC_TEMP1]], i8 [[CMPXCHG_DESIRED]], i32 5, i32 5)
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_1]], 0
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP3]], ptr [[ATOMIC_TEMP1]], i64 1, i1 false)
-// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP3]], align 1
-// CHECK-NEXT: [[LOADEDV4:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-NEXT: [[STOREDV5]] = zext i1 [[LOADEDV4]] to i8
-// CHECK-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
-// CHECK: [[ATOMIC_CONT]]:
-// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw sub ptr @i, i32 1 seq_cst, align 4
-// CHECK-NEXT: [[TMP4:%.*]] = atomicrmw sub ptr @l, i64 1 seq_cst, align 8
-// CHECK-NEXT: [[TMP5:%.*]] = atomicrmw sub ptr @s, i16 1 seq_cst, align 2
-// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 1, ptr noundef @b, ptr noundef [[ATOMIC_TEMP6]], i32 noundef 5)
-// CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[ATOMIC_TEMP6]], align 1
-// CHECK-NEXT: [[LOADEDV7:%.*]] = trunc i8 [[TMP6]] to i1
-// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV7]] to i8
-// CHECK-NEXT: br label %[[ATOMIC_OP8:.*]]
-// CHECK: [[ATOMIC_OP8]]:
-// CHECK-NEXT: [[TMP7:%.*]] = phi i8 [ [[STOREDV9]], %[[ATOMIC_CONT]] ], [ [[STOREDV19:%.*]], %[[ATOMIC_OP8]] ]
-// CHECK-NEXT: [[DEC10:%.*]] = add i8 [[TMP7]], -1
-// CHECK-NEXT: store i8 [[TMP7]], ptr [[ATOMIC_TEMP12]], align 1
-// CHECK-NEXT: store i8 [[DEC10]], ptr [[ATOMIC_TEMP13]], align 1
-// CHECK-NEXT: [[CMPXCHG_DESIRED15:%.*]] = load i8, ptr [[ATOMIC_TEMP13]], align 1
-// CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_116:%.*]] = call i8 @__atomic_compare_exchange_1(ptr @b, ptr [[ATOMIC_TEMP12]], i8 [[CMPXCHG_DESIRED15]], i32 5, i32 5)
-// CHECK-NEXT: [[CMPXCHG_SUCCESS17:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_116]], 0
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP14]], ptr [[ATOMIC_TEMP12]], i64 1, i1 false)
-// CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ATOMIC_TEMP14]], align 1
-// CHECK-NEXT: [[LOADEDV18:%.*]] = trunc i8 [[TMP8]] to i1
-// CHECK-NEXT: [[STOREDV19]] = zext i1 [[LOADEDV18]] to i8
-// CHECK-NEXT: br i1 [[CMPXCHG_SUCCESS17]], label %[[ATOMIC_CONT11:.*]], label %[[ATOMIC_OP8]]
-// CHECK: [[ATOMIC_CONT11]]:
-// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw sub ptr @i, i32 1 seq_cst, align 4
-// CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[TMP9]], 1
-// CHECK-NEXT: [[TMP11:%.*]] = atomicrmw sub ptr @l, i64 1 seq_cst, align 8
-// CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP11]], 1
-// CHECK-NEXT: [[TMP13:%.*]] = atomicrmw sub ptr @s, i16 1 seq_cst, align 2
-// CHECK-NEXT: [[TMP14:%.*]] = sub i16 [[TMP13]], 1
-// CHECK-NEXT: ret void
-//
+// CHECK: testdec
void testdec(void)
{
+ // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 noundef 1, ptr noundef @b
b--;
+ // CHECK: atomicrmw sub ptr @i, i32 1 seq_cst, align 4
i--;
+ // CHECK: atomicrmw sub ptr @l, i64 1 seq_cst, align 8
l--;
+ // CHECK: atomicrmw sub ptr @s, i16 1 seq_cst, align 2
s--;
+ // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 noundef 1, ptr noundef @b
--b;
+ // CHECK: atomicrmw sub ptr @i, i32 1 seq_cst, align 4
+ // CHECK: sub i32
--i;
+ // CHECK: atomicrmw sub ptr @l, i64 1 seq_cst, align 8
+ // CHECK: sub i64
--l;
+ // CHECK: atomicrmw sub ptr @s, i16 1 seq_cst, align 2
+ // CHECK: sub i16
--s;
}
-// CHECK-LABEL: define dso_local arm_aapcscc void @testaddeq(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*]]:
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i8, align 1
-// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 1, ptr noundef @b, ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
-// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
-// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
-// CHECK-NEXT: br label %[[ATOMIC_OP:.*]]
-// CHECK: [[ATOMIC_OP]]:
-// CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[STOREDV]], %[[ENTRY]] ], [ [[STOREDV6:%.*]], %[[ATOMIC_OP]] ]
-// CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i32
-// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 42
-// CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i8
-// CHECK-NEXT: store i8 [[TMP1]], ptr [[ATOMIC_TEMP2]], align 1
-// CHECK-NEXT: store i8 [[CONV1]], ptr [[ATOMIC_TEMP3]], align 1
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i8, ptr [[ATOMIC_TEMP3]], align 1
-// CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_1:%.*]] = call i8 @__atomic_compare_exchange_1(ptr @b, ptr [[ATOMIC_TEMP2]], i8 [[CMPXCHG_DESIRED]], i32 5, i32 5)
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_1]], 0
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP4]], ptr [[ATOMIC_TEMP2]], i64 1, i1 false)
-// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP4]], align 1
-// CHECK-NEXT: [[LOADEDV5:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-NEXT: [[STOREDV6]] = zext i1 [[LOADEDV5]] to i8
-// CHECK-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
-// CHECK: [[ATOMIC_CONT]]:
-// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw add ptr @i, i32 42 seq_cst, align 4
-// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 42
-// CHECK-NEXT: [[TMP5:%.*]] = atomicrmw add ptr @l, i64 42 seq_cst, align 8
-// CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 42
-// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw add ptr @s, i16 42 seq_cst, align 2
-// CHECK-NEXT: [[TMP8:%.*]] = add i16 [[TMP7]], 42
-// CHECK-NEXT: ret void
-//
+// CHECK: testaddeq
void testaddeq(void)
{
+ // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 noundef 1, ptr noundef @b
+ // CHECK: atomicrmw add ptr @i, i32 42 seq_cst, align 4
+ // CHECK: atomicrmw add ptr @l, i64 42 seq_cst, align 8
+ // CHECK: atomicrmw add ptr @s, i16 42 seq_cst, align 2
b += 42;
i += 42;
l += 42;
s += 42;
}
-// CHECK-LABEL: define dso_local arm_aapcscc void @testsubeq(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*]]:
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i8, align 1
-// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 1, ptr noundef @b, ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
-// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
-// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
-// CHECK-NEXT: br label %[[ATOMIC_OP:.*]]
-// CHECK: [[ATOMIC_OP]]:
-// CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[STOREDV]], %[[ENTRY]] ], [ [[STOREDV6:%.*]], %[[ATOMIC_OP]] ]
-// CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i32
-// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[CONV]], 42
-// CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[SUB]] to i8
-// CHECK-NEXT: store i8 [[TMP1]], ptr [[ATOMIC_TEMP2]], align 1
-// CHECK-NEXT: store i8 [[CONV1]], ptr [[ATOMIC_TEMP3]], align 1
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i8, ptr [[ATOMIC_TEMP3]], align 1
-// CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_1:%.*]] = call i8 @__atomic_compare_exchange_1(ptr @b, ptr [[ATOMIC_TEMP2]], i8 [[CMPXCHG_DESIRED]], i32 5, i32 5)
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_1]], 0
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP4]], ptr [[ATOMIC_TEMP2]], i64 1, i1 false)
-// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP4]], align 1
-// CHECK-NEXT: [[LOADEDV5:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-NEXT: [[STOREDV6]] = zext i1 [[LOADEDV5]] to i8
-// CHECK-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
-// CHECK: [[ATOMIC_CONT]]:
-// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw sub ptr @i, i32 42 seq_cst, align 4
-// CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 42
-// CHECK-NEXT: [[TMP5:%.*]] = atomicrmw sub ptr @l, i64 42 seq_cst, align 8
-// CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 42
-// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw sub ptr @s, i16 42 seq_cst, align 2
-// CHECK-NEXT: [[TMP8:%.*]] = sub i16 [[TMP7]], 42
-// CHECK-NEXT: ret void
-//
+// CHECK: testsubeq
void testsubeq(void)
{
+ // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 noundef 1, ptr noundef @b
+ // CHECK: atomicrmw sub ptr @i, i32 42 seq_cst, align 4
+ // CHECK: atomicrmw sub ptr @l, i64 42 seq_cst, align 8
+ // CHECK: atomicrmw sub ptr @s, i16 42 seq_cst, align 2
b -= 42;
i -= 42;
l -= 42;
s -= 42;
}
-// CHECK-LABEL: define dso_local arm_aapcscc void @testxoreq(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*]]:
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i8, align 1
-// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 1, ptr noundef @b, ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
-// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
-// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
-// CHECK-NEXT: br label %[[ATOMIC_OP:.*]]
-// CHECK: [[ATOMIC_OP]]:
-// CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[STOREDV]], %[[ENTRY]] ], [ [[STOREDV6:%.*]], %[[ATOMIC_OP]] ]
-// CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i32
-// CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 42
-// CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[XOR]] to i8
-// CHECK-NEXT: store i8 [[TMP1]], ptr [[ATOMIC_TEMP2]], align 1
-// CHECK-NEXT: store i8 [[CONV1]], ptr [[ATOMIC_TEMP3]], align 1
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i8, ptr [[ATOMIC_TEMP3]], align 1
-// CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_1:%.*]] = call i8 @__atomic_compare_exchange_1(ptr @b, ptr [[ATOMIC_TEMP2]], i8 [[CMPXCHG_DESIRED]], i32 5, i32 5)
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_1]], 0
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP4]], ptr [[ATOMIC_TEMP2]], i64 1, i1 false)
-// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP4]], align 1
-// CHECK-NEXT: [[LOADEDV5:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-NEXT: [[STOREDV6]] = zext i1 [[LOADEDV5]] to i8
-// CHECK-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
-// CHECK: [[ATOMIC_CONT]]:
-// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw xor ptr @i, i32 42 seq_cst, align 4
-// CHECK-NEXT: [[TMP4:%.*]] = xor i32 [[TMP3]], 42
-// CHECK-NEXT: [[TMP5:%.*]] = atomicrmw xor ptr @l, i64 42 seq_cst, align 8
-// CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 42
-// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw xor ptr @s, i16 42 seq_cst, align 2
-// CHECK-NEXT: [[TMP8:%.*]] = xor i16 [[TMP7]], 42
-// CHECK-NEXT: ret void
-//
+// CHECK: testxoreq
void testxoreq(void)
{
+ // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 noundef 1, ptr noundef @b
+ // CHECK: atomicrmw xor ptr @i, i32 42 seq_cst, align 4
+ // CHECK: atomicrmw xor ptr @l, i64 42 seq_cst, align 8
+ // CHECK: atomicrmw xor ptr @s, i16 42 seq_cst, align 2
b ^= 42;
i ^= 42;
l ^= 42;
s ^= 42;
}
-// CHECK-LABEL: define dso_local arm_aapcscc void @testoreq(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*]]:
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i8, align 1
-// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 1, ptr noundef @b, ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
-// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
-// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
-// CHECK-NEXT: br label %[[ATOMIC_OP:.*]]
-// CHECK: [[ATOMIC_OP]]:
-// CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[STOREDV]], %[[ENTRY]] ], [ [[STOREDV6:%.*]], %[[ATOMIC_OP]] ]
-// CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i32
-// CHECK-NEXT: [[OR:%.*]] = or i32 [[CONV]], 42
-// CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[OR]] to i8
-// CHECK-NEXT: store i8 [[TMP1]], ptr [[ATOMIC_TEMP2]], align 1
-// CHECK-NEXT: store i8 [[CONV1]], ptr [[ATOMIC_TEMP3]], align 1
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i8, ptr [[ATOMIC_TEMP3]], align 1
-// CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_1:%.*]] = call i8 @__atomic_compare_exchange_1(ptr @b, ptr [[ATOMIC_TEMP2]], i8 [[CMPXCHG_DESIRED]], i32 5, i32 5)
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_1]], 0
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP4]], ptr [[ATOMIC_TEMP2]], i64 1, i1 false)
-// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP4]], align 1
-// CHECK-NEXT: [[LOADEDV5:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-NEXT: [[STOREDV6]] = zext i1 [[LOADEDV5]] to i8
-// CHECK-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
-// CHECK: [[ATOMIC_CONT]]:
-// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw or ptr @i, i32 42 seq_cst, align 4
-// CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], 42
-// CHECK-NEXT: [[TMP5:%.*]] = atomicrmw or ptr @l, i64 42 seq_cst, align 8
-// CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP5]], 42
-// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw or ptr @s, i16 42 seq_cst, align 2
-// CHECK-NEXT: [[TMP8:%.*]] = or i16 [[TMP7]], 42
-// CHECK-NEXT: ret void
-//
+// CHECK: testoreq
void testoreq(void)
{
+ // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 noundef 1, ptr noundef @b
+ // CHECK: atomicrmw or ptr @i, i32 42 seq_cst, align 4
+ // CHECK: atomicrmw or ptr @l, i64 42 seq_cst, align 8
+ // CHECK: atomicrmw or ptr @s, i16 42 seq_cst, align 2
b |= 42;
i |= 42;
l |= 42;
s |= 42;
}
-// CHECK-LABEL: define dso_local arm_aapcscc void @testandeq(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*]]:
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i8, align 1
-// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 1, ptr noundef @b, ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
-// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
-// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
-// CHECK-NEXT: br label %[[ATOMIC_OP:.*]]
-// CHECK: [[ATOMIC_OP]]:
-// CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[STOREDV]], %[[ENTRY]] ], [ [[STOREDV6:%.*]], %[[ATOMIC_OP]] ]
-// CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i32
-// CHECK-NEXT: [[AND:%.*]] = and i32 [[CONV]], 42
-// CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[AND]] to i8
-// CHECK-NEXT: store i8 [[TMP1]], ptr [[ATOMIC_TEMP2]], align 1
-// CHECK-NEXT: store i8 [[CONV1]], ptr [[ATOMIC_TEMP3]], align 1
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i8, ptr [[ATOMIC_TEMP3]], align 1
-// CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_1:%.*]] = call i8 @__atomic_compare_exchange_1(ptr @b, ptr [[ATOMIC_TEMP2]], i8 [[CMPXCHG_DESIRED]], i32 5, i32 5)
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_1]], 0
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ATOMIC_TEMP4]], ptr [[ATOMIC_TEMP2]], i64 1, i1 false)
-// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP4]], align 1
-// CHECK-NEXT: [[LOADEDV5:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-NEXT: [[STOREDV6]] = zext i1 [[LOADEDV5]] to i8
-// CHECK-NEXT: br i1 [[CMPXCHG_SUCCESS]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
-// CHECK: [[ATOMIC_CONT]]:
-// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw and ptr @i, i32 42 seq_cst, align 4
-// CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 42
-// CHECK-NEXT: [[TMP5:%.*]] = atomicrmw and ptr @l, i64 42 seq_cst, align 8
-// CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 42
-// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw and ptr @s, i16 42 seq_cst, align 2
-// CHECK-NEXT: [[TMP8:%.*]] = and i16 [[TMP7]], 42
-// CHECK-NEXT: ret void
-//
+// CHECK: testandeq
void testandeq(void)
{
+ // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 noundef 1, ptr noundef @b
+ // CHECK: atomicrmw and ptr @i, i32 42 seq_cst, align 4
+ // CHECK: atomicrmw and ptr @l, i64 42 seq_cst, align 8
+ // CHECK: atomicrmw and ptr @s, i16 42 seq_cst, align 2
b &= 42;
i &= 42;
l &= 42;
s &= 42;
}
-// CHECK-LABEL: define dso_local arm_aapcscc void @testFloat(
-// CHECK-SAME: ptr noundef [[FP:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[X:%.*]] = alloca float, align 4
-// CHECK-NEXT: [[F:%.*]] = alloca float, align 4
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
-// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca float, align 4
-// CHECK-NEXT: store ptr [[FP]], ptr [[FP_ADDR]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
-// CHECK-NEXT: store float 1.000000e+00, ptr [[TMP0]], align 4
-// CHECK-NEXT: store float 2.000000e+00, ptr [[X]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
-// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 4, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
-// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: store float [[TMP2]], ptr [[F]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
-// CHECK-NEXT: store float [[TMP3]], ptr [[ATOMIC_TEMP1]], align 4
-// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 noundef 4, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 5)
-// CHECK-NEXT: ret void
-//
+// CHECK-LABEL: define{{.*}} arm_aapcscc void @testFloat(ptr
void testFloat(_Atomic(float) *fp) {
-
+// CHECK: [[FP:%.*]] = alloca ptr
+// CHECK-NEXT: [[X:%.*]] = alloca float
+// CHECK-NEXT: [[F:%.*]] = alloca float
+// CHECK-NEXT: [[TMP0:%.*]] = alloca float
+// CHECK-NEXT: [[TMP1:%.*]] = alloca float
+// CHECK-NEXT: store ptr {{%.*}}, ptr [[FP]]
+
+// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
+// CHECK-NEXT: store float 1.000000e+00, ptr [[T0]], align 4
__c11_atomic_init(fp, 1.0f);
+// CHECK-NEXT: store float 2.000000e+00, ptr [[X]], align 4
_Atomic(float) x = 2.0f;
+// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
+// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 4, ptr noundef [[T0]], ptr noundef [[TMP0]], i32 noundef 5)
+// CHECK-NEXT: [[T3:%.*]] = load float, ptr [[TMP0]], align 4
+// CHECK-NEXT: store float [[T3]], ptr [[F]]
float f = *fp;
+// CHECK-NEXT: [[T0:%.*]] = load float, ptr [[F]], align 4
+// CHECK-NEXT: [[T1:%.*]] = load ptr, ptr [[FP]], align 4
+// CHECK-NEXT: store float [[T0]], ptr [[TMP1]], align 4
+// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 noundef 4, ptr noundef [[T1]], ptr noundef [[TMP1]], i32 noundef 5)
*fp = f;
+// CHECK-NEXT: ret void
}
-// CHECK-LABEL: define dso_local arm_aapcscc void @testComplexFloat(
-// CHECK-SAME: ptr noundef [[FP:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[X:%.*]] = alloca { float, float }, align 8
-// CHECK-NEXT: [[F:%.*]] = alloca { float, float }, align 4
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { float, float }, align 8
-// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca { float, float }, align 8
-// CHECK-NEXT: store ptr [[FP]], ptr [[FP_ADDR]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
-// CHECK-NEXT: [[DOTREALP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP0]], i32 0, i32 0
-// CHECK-NEXT: [[DOTIMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP0]], i32 0, i32 1
-// CHECK-NEXT: store float 1.000000e+00, ptr [[DOTREALP]], align 8
-// CHECK-NEXT: store float 0.000000e+00, ptr [[DOTIMAGP]], align 4
-// CHECK-NEXT: [[X_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[X]], i32 0, i32 0
-// CHECK-NEXT: [[X_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[X]], i32 0, i32 1
-// CHECK-NEXT: store float 2.000000e+00, ptr [[X_REALP]], align 8
-// CHECK-NEXT: store float 0.000000e+00, ptr [[X_IMAGP]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
-// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 8, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
-// CHECK-NEXT: [[ATOMIC_TEMP_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[ATOMIC_TEMP]], i32 0, i32 0
-// CHECK-NEXT: [[ATOMIC_TEMP_REAL:%.*]] = load float, ptr [[ATOMIC_TEMP_REALP]], align 8
-// CHECK-NEXT: [[ATOMIC_TEMP_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[ATOMIC_TEMP]], i32 0, i32 1
-// CHECK-NEXT: [[ATOMIC_TEMP_IMAG:%.*]] = load float, ptr [[ATOMIC_TEMP_IMAGP]], align 4
-// CHECK-NEXT: [[F_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[F]], i32 0, i32 0
-// CHECK-NEXT: [[F_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[F]], i32 0, i32 1
-// CHECK-NEXT: store float [[ATOMIC_TEMP_REAL]], ptr [[F_REALP]], align 4
-// CHECK-NEXT: store float [[ATOMIC_TEMP_IMAG]], ptr [[F_IMAGP]], align 4
-// CHECK-NEXT: [[F_REALP1:%.*]] = getelementptr inbounds { float, float }, ptr [[F]], i32 0, i32 0
-// CHECK-NEXT: [[F_REAL:%.*]] = load float, ptr [[F_REALP1]], align 4
-// CHECK-NEXT: [[F_IMAGP2:%.*]] = getelementptr inbounds { float, float }, ptr [[F]], i32 0, i32 1
-// CHECK-NEXT: [[F_IMAG:%.*]] = load float, ptr [[F_IMAGP2]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
-// CHECK-NEXT: [[ATOMIC_TEMP3_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[ATOMIC_TEMP3]], i32 0, i32 0
-// CHECK-NEXT: [[ATOMIC_TEMP3_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[ATOMIC_TEMP3]], i32 0, i32 1
-// CHECK-NEXT: store float [[F_REAL]], ptr [[ATOMIC_TEMP3_REALP]], align 8
-// CHECK-NEXT: store float [[F_IMAG]], ptr [[ATOMIC_TEMP3_IMAGP]], align 4
-// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 noundef 8, ptr noundef [[TMP2]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef 5)
-// CHECK-NEXT: ret void
-//
+// CHECK: define{{.*}} arm_aapcscc void @testComplexFloat(ptr
void testComplexFloat(_Atomic(_Complex float) *fp) {
-
+// CHECK: [[FP:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[X:%.*]] = alloca [[CF:{ float, float }]], align 8
+// CHECK-NEXT: [[F:%.*]] = alloca [[CF]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = alloca [[CF]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = alloca [[CF]], align 8
+// CHECK-NEXT: store ptr
+
+// CHECK-NEXT: [[P:%.*]] = load ptr, ptr [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[P]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], ptr [[P]], i32 0, i32 1
+// CHECK-NEXT: store float 1.000000e+00, ptr [[T0]]
+// CHECK-NEXT: store float 0.000000e+00, ptr [[T1]]
__c11_atomic_init(fp, 1.0f);
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[X]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], ptr [[X]], i32 0, i32 1
+// CHECK-NEXT: store float 2.000000e+00, ptr [[T0]]
+// CHECK-NEXT: store float 0.000000e+00, ptr [[T1]]
_Atomic(_Complex float) x = 2.0f;
+// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
+// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 8, ptr noundef [[T0]], ptr noundef [[TMP0]], i32 noundef 5)
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: [[R:%.*]] = load float, ptr [[T0]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[TMP0]], i32 0, i32 1
+// CHECK-NEXT: [[I:%.*]] = load float, ptr [[T0]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[F]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], ptr [[F]], i32 0, i32 1
+// CHECK-NEXT: store float [[R]], ptr [[T0]]
+// CHECK-NEXT: store float [[I]], ptr [[T1]]
_Complex float f = *fp;
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[F]], i32 0, i32 0
+// CHECK-NEXT: [[R:%.*]] = load float, ptr [[T0]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[F]], i32 0, i32 1
+// CHECK-NEXT: [[I:%.*]] = load float, ptr [[T0]]
+// CHECK-NEXT: [[DEST:%.*]] = load ptr, ptr [[FP]], align 4
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], ptr [[TMP1]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], ptr [[TMP1]], i32 0, i32 1
+// CHECK-NEXT: store float [[R]], ptr [[T0]]
+// CHECK-NEXT: store float [[I]], ptr [[T1]]
+// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 noundef 8, ptr noundef [[DEST]], ptr noundef [[TMP1]], i32 noundef 5)
*fp = f;
+// CHECK-NEXT: ret void
}
typedef struct { short x, y, z, w; } S;
_Atomic S testStructGlobal = (S){1, 2, 3, 4};
-// CHECK-LABEL: define dso_local arm_aapcscc void @testStruct(
-// CHECK-SAME: ptr noundef [[FP:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[X1:%.*]] = alloca [[STRUCT_S:%.*]], align 8
-// CHECK-NEXT: [[F:%.*]] = alloca [[STRUCT_S]], align 2
-// CHECK-NEXT: [[AGG_TMP_ENSURED:%.*]] = alloca [[STRUCT_S]], align 8
-// CHECK-NEXT: store ptr [[FP]], ptr [[FP_ADDR]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
-// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP0]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, ptr [[X]], align 8
-// CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP0]], i32 0, i32 1
-// CHECK-NEXT: store i16 2, ptr [[Y]], align 2
-// CHECK-NEXT: [[Z:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP0]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, ptr [[Z]], align 4
-// CHECK-NEXT: [[W:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP0]], i32 0, i32 3
-// CHECK-NEXT: store i16 4, ptr [[W]], align 2
-// CHECK-NEXT: [[X2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[X1]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, ptr [[X2]], align 8
-// CHECK-NEXT: [[Y3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[X1]], i32 0, i32 1
-// CHECK-NEXT: store i16 2, ptr [[Y3]], align 2
-// CHECK-NEXT: [[Z4:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[X1]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, ptr [[Z4]], align 4
-// CHECK-NEXT: [[W5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[X1]], i32 0, i32 3
-// CHECK-NEXT: store i16 4, ptr [[W5]], align 2
-// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
-// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 8, ptr noundef [[TMP1]], ptr noundef [[F]], i32 noundef 5)
-// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[AGG_TMP_ENSURED]], ptr align 2 [[F]], i32 8, i1 false)
-// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 noundef 8, ptr noundef [[TMP2]], ptr noundef [[AGG_TMP_ENSURED]], i32 noundef 5)
-// CHECK-NEXT: ret void
-//
+// CHECK: define{{.*}} arm_aapcscc void @testStruct(ptr
void testStruct(_Atomic(S) *fp) {
-
+// CHECK: [[FP:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[X:%.*]] = alloca [[S:.*]], align 8
+// CHECK-NEXT: [[F:%.*]] = alloca [[S:%.*]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = alloca [[S]], align 8
+// CHECK-NEXT: store ptr
+
+// CHECK-NEXT: [[P:%.*]] = load ptr, ptr [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[P]], i32 0, i32 0
+// CHECK-NEXT: store i16 1, ptr [[T0]], align 8
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[P]], i32 0, i32 1
+// CHECK-NEXT: store i16 2, ptr [[T0]], align 2
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[P]], i32 0, i32 2
+// CHECK-NEXT: store i16 3, ptr [[T0]], align 4
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[P]], i32 0, i32 3
+// CHECK-NEXT: store i16 4, ptr [[T0]], align 2
__c11_atomic_init(fp, (S){1,2,3,4});
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[X]], i32 0, i32 0
+// CHECK-NEXT: store i16 1, ptr [[T0]], align 8
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[X]], i32 0, i32 1
+// CHECK-NEXT: store i16 2, ptr [[T0]], align 2
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[X]], i32 0, i32 2
+// CHECK-NEXT: store i16 3, ptr [[T0]], align 4
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], ptr [[X]], i32 0, i32 3
+// CHECK-NEXT: store i16 4, ptr [[T0]], align 2
_Atomic(S) x = (S){1,2,3,4};
+// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
+// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 8, ptr noundef [[T0]], ptr noundef [[F]], i32 noundef 5)
S f = *fp;
+// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[TMP0]], ptr align 2 [[F]], i32 8, i1 false)
+// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 noundef 8, ptr noundef [[T0]], ptr noundef [[TMP0]], i32 noundef 5)
*fp = f;
+// CHECK-NEXT: ret void
}
typedef struct { short x, y, z; } PS;
_Atomic PS testPromotedStructGlobal = (PS){1, 2, 3};
-// CHECK-LABEL: define dso_local arm_aapcscc void @testPromotedStruct(
-// CHECK-SAME: ptr noundef [[FP:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[X1:%.*]] = alloca { [[STRUCT_PS:%.*]], [2 x i8] }, align 8
-// CHECK-NEXT: [[F:%.*]] = alloca [[STRUCT_PS]], align 2
-// CHECK-NEXT: [[ATOMIC_TO_NONATOMIC_TEMP:%.*]] = alloca { [[STRUCT_PS]], [2 x i8] }, align 8
-// CHECK-NEXT: [[AGG_TMP_ENSURED:%.*]] = alloca { [[STRUCT_PS]], [2 x i8] }, align 8
-// CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_PS]], align 2
-// CHECK-NEXT: [[ATOMIC_TO_NONATOMIC_TEMP5:%.*]] = alloca { [[STRUCT_PS]], [2 x i8] }, align 8
-// CHECK-NEXT: store ptr [[FP]], ptr [[FP_ADDR]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
-// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP0]], i8 0, i64 8, i1 false)
-// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds { [[STRUCT_PS]], [2 x i8] }, ptr [[TMP0]], i32 0, i32 0
-// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_PS]], ptr [[TMP1]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, ptr [[X]], align 8
-// CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_PS]], ptr [[TMP1]], i32 0, i32 1
-// CHECK-NEXT: store i16 2, ptr [[Y]], align 2
-// CHECK-NEXT: [[Z:%.*]] = getelementptr inbounds [[STRUCT_PS]], ptr [[TMP1]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, ptr [[Z]], align 4
-// CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 8 [[X1]], i8 0, i32 8, i1 false)
-// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds { [[STRUCT_PS]], [2 x i8] }, ptr [[X1]], i32 0, i32 0
-// CHECK-NEXT: [[X2:%.*]] = getelementptr inbounds [[STRUCT_PS]], ptr [[TMP2]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, ptr [[X2]], align 8
-// CHECK-NEXT: [[Y3:%.*]] = getelementptr inbounds [[STRUCT_PS]], ptr [[TMP2]], i32 0, i32 1
-// CHECK-NEXT: store i16 2, ptr [[Y3]], align 2
-// CHECK-NEXT: [[Z4:%.*]] = getelementptr inbounds [[STRUCT_PS]], ptr [[TMP2]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, ptr [[Z4]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
-// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 8, ptr noundef [[TMP3]], ptr noundef [[ATOMIC_TO_NONATOMIC_TEMP]], i32 noundef 5)
-// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds { [[STRUCT_PS]], [2 x i8] }, ptr [[ATOMIC_TO_NONATOMIC_TEMP]], i32 0, i32 0
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[F]], ptr align 8 [[TMP4]], i32 6, i1 false)
-// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
-// CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 8 [[AGG_TMP_ENSURED]], i8 0, i32 8, i1 false)
-// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds { [[STRUCT_PS]], [2 x i8] }, ptr [[AGG_TMP_ENSURED]], i32 0, i32 0
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[TMP6]], ptr align 2 [[F]], i32 6, i1 false)
-// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 noundef 8, ptr noundef [[TMP5]], ptr noundef [[AGG_TMP_ENSURED]], i32 noundef 5)
-// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[FP_ADDR]], align 4
-// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 8, ptr noundef [[TMP7]], ptr noundef [[ATOMIC_TO_NONATOMIC_TEMP5]], i32 noundef 5)
-// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds { [[STRUCT_PS]], [2 x i8] }, ptr [[ATOMIC_TO_NONATOMIC_TEMP5]], i32 0, i32 0
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[TMP]], ptr align 8 [[TMP8]], i32 6, i1 false)
-// CHECK-NEXT: [[X6:%.*]] = getelementptr inbounds [[STRUCT_PS]], ptr [[TMP]], i32 0, i32 0
-// CHECK-NEXT: [[TMP9:%.*]] = load i16, ptr [[X6]], align 2
-// CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32
-// CHECK-NEXT: store i32 [[CONV]], ptr [[A]], align 4
-// CHECK-NEXT: ret void
-//
+// CHECK: define{{.*}} arm_aapcscc void @testPromotedStruct(ptr
void testPromotedStruct(_Atomic(PS) *fp) {
-
+// CHECK: [[FP:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[X:%.*]] = alloca [[APS:.*]], align 8
+// CHECK-NEXT: [[F:%.*]] = alloca [[PS:%.*]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = alloca [[APS]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = alloca [[APS]], align 8
+// CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[TMP2:%.*]] = alloca %struct.PS, align 2
+// CHECK-NEXT: [[TMP3:%.*]] = alloca [[APS]], align 8
+// CHECK-NEXT: store ptr
+
+// CHECK-NEXT: [[P:%.*]] = load ptr, ptr [[FP]]
+// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[P]], i8 0, i64 8, i1 false)
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], ptr [[P]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 0
+// CHECK-NEXT: store i16 1, ptr [[T1]], align 8
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 1
+// CHECK-NEXT: store i16 2, ptr [[T1]], align 2
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 2
+// CHECK-NEXT: store i16 3, ptr [[T1]], align 4
__c11_atomic_init(fp, (PS){1,2,3});
+// CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 8 [[X]], i8 0, i32 8, i1 false)
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], ptr [[X]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 0
+// CHECK-NEXT: store i16 1, ptr [[T1]], align 8
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 1
+// CHECK-NEXT: store i16 2, ptr [[T1]], align 2
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], ptr [[T0]], i32 0, i32 2
+// CHECK-NEXT: store i16 3, ptr [[T1]], align 4
_Atomic(PS) x = (PS){1,2,3};
+// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
+// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 8, ptr noundef [[T0]], ptr noundef [[TMP0]], i32 noundef 5)
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], ptr [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[F]], ptr align 8 [[T0]], i32 6, i1 false)
PS f = *fp;
+// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
+// CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 8 [[TMP1]], i8 0, i32 8, i1 false)
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]], ptr [[TMP1]], i32 0, i32 0
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[T1]], ptr align 2 [[F]], i32 6, i1 false)
+// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 noundef 8, ptr noundef [[T0]], ptr noundef [[TMP1]], i32 noundef 5)
*fp = f;
+// CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]], align 4
+// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 noundef 8, ptr noundef [[T0]], ptr noundef [[TMP3]], i32 noundef 5)
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], ptr [[TMP3]], i32 0, i32 0
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[TMP2]], ptr align 8 [[T0]], i32 6, i1 false)
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds %struct.PS, ptr [[TMP2]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = load i16, ptr [[T0]], align 2
+// CHECK-NEXT: [[T2:%.*]] = sext i16 [[T1]] to i32
+// CHECK-NEXT: store i32 [[T2]], ptr [[A]], align 4
int a = ((PS)*fp).x;
+// CHECK-NEXT: ret void
}
-// CHECK-LABEL: define dso_local arm_aapcscc void @test_promoted_load(
-// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_PS:%.*]]) align 2 [[AGG_RESULT:%.*]], ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { [[STRUCT_PS]], [2 x i8] }, align 8
-// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load atomic i64, ptr [[TMP0]] seq_cst, align 8
-// CHECK-NEXT: store i64 [[TMP1]], ptr [[ATOMIC_TEMP]], align 8
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[AGG_RESULT]], ptr align 8 [[ATOMIC_TEMP]], i32 6, i1 false)
-// CHECK-NEXT: ret void
-//
PS test_promoted_load(_Atomic(PS) *addr) {
+ // CHECK-LABEL: @test_promoted_load(ptr dead_on_unwind noalias writable sret(%struct.PS) align 2 %agg.result, ptr noundef %addr)
+ // CHECK: [[ADDR_ARG:%.*]] = alloca ptr, align 4
+ // CHECK: [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+ // CHECK: store ptr %addr, ptr [[ADDR_ARG]], align 4
+ // CHECK: [[ADDR:%.*]] = load ptr, ptr [[ADDR_ARG]], align 4
+ // CHECK: [[ATOMIC_RES:%.*]] = load atomic i64, ptr [[ADDR]] seq_cst, align 8
+ // CHECK: store i64 [[ATOMIC_RES]], ptr [[ATOMIC_RES_ADDR:%.*]], align 8
+ // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 2 %agg.result, ptr align 8 [[ATOMIC_RES_ADDR]], i32 6, i1 false)
return __c11_atomic_load(addr, 5);
}
-// CHECK-LABEL: define dso_local arm_aapcscc void @test_promoted_store(
-// CHECK-SAME: ptr noundef [[ADDR:%.*]], ptr noundef [[VAL:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca [[STRUCT_PS:%.*]], align 2
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { [[STRUCT_PS]], [2 x i8] }, align 8
-// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
-// CHECK-NEXT: store ptr [[VAL]], ptr [[VAL_ADDR]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAL_ADDR]], align 4
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[DOTATOMICTMP]], ptr align 2 [[TMP1]], i32 6, i1 false)
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_TEMP]], ptr align 2 [[DOTATOMICTMP]], i64 6, i1 false)
-// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[ATOMIC_TEMP]], align 8
-// CHECK-NEXT: store atomic i64 [[TMP2]], ptr [[TMP0]] seq_cst, align 8
-// CHECK-NEXT: ret void
-//
void test_promoted_store(_Atomic(PS) *addr, PS *val) {
+ // CHECK-LABEL: @test_promoted_store(ptr noundef %addr, ptr noundef %val)
+ // CHECK: [[ADDR_ARG:%.*]] = alloca ptr, align 4
+ // CHECK: [[VAL_ARG:%.*]] = alloca ptr, align 4
+ // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+ // CHECK: [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+ // CHECK: store ptr %addr, ptr [[ADDR_ARG]], align 4
+ // CHECK: store ptr %val, ptr [[VAL_ARG]], align 4
+ // CHECK: [[ADDR:%.*]] = load ptr, ptr [[ADDR_ARG]], align 4
+ // CHECK: [[VAL:%.*]] = load ptr, ptr [[VAL_ARG]], align 4
+ // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[NONATOMIC_TMP]], ptr align 2 [[VAL]], i32 6, i1 false)
+ // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_VAL]], ptr align 2 [[NONATOMIC_TMP]], i64 6, i1 false)
+ // CHECK: [[ATOMIC:%.*]] = load i64, ptr [[ATOMIC_VAL]], align 8
+ // CHECK: store atomic i64 [[ATOMIC]], ptr [[ADDR]] seq_cst, align 8
__c11_atomic_store(addr, *val, 5);
}
-// CHECK-LABEL: define dso_local arm_aapcscc void @test_promoted_exchange(
-// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_PS:%.*]]) align 2 [[AGG_RESULT:%.*]], ptr noundef [[ADDR:%.*]], ptr noundef [[VAL:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca [[STRUCT_PS]], align 2
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { [[STRUCT_PS]], [2 x i8] }, align 8
-// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca { [[STRUCT_PS]], [2 x i8] }, align 8
-// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
-// CHECK-NEXT: store ptr [[VAL]], ptr [[VAL_ADDR]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAL_ADDR]], align 4
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[DOTATOMICTMP]], ptr align 2 [[TMP1]], i32 6, i1 false)
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_TEMP]], ptr align 2 [[DOTATOMICTMP]], i64 6, i1 false)
-// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[ATOMIC_TEMP]], align 8
-// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw xchg ptr [[TMP0]], i64 [[TMP2]] seq_cst, align 8
-// CHECK-NEXT: store i64 [[TMP3]], ptr [[ATOMIC_TEMP1]], align 8
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[AGG_RESULT]], ptr align 8 [[ATOMIC_TEMP1]], i32 6, i1 false)
-// CHECK-NEXT: ret void
-//
PS test_promoted_exchange(_Atomic(PS) *addr, PS *val) {
+ // CHECK-LABEL: @test_promoted_exchange(ptr dead_on_unwind noalias writable sret(%struct.PS) align 2 %agg.result, ptr noundef %addr, ptr noundef %val)
+ // CHECK: [[ADDR_ARG:%.*]] = alloca ptr, align 4
+ // CHECK: [[VAL_ARG:%.*]] = alloca ptr, align 4
+ // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+ // CHECK: [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+ // CHECK: [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+ // CHECK: store ptr %addr, ptr [[ADDR_ARG]], align 4
+ // CHECK: store ptr %val, ptr [[VAL_ARG]], align 4
+ // CHECK: [[ADDR:%.*]] = load ptr, ptr [[ADDR_ARG]], align 4
+ // CHECK: [[VAL:%.*]] = load ptr, ptr [[VAL_ARG]], align 4
+ // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[NONATOMIC_TMP]], ptr align 2 [[VAL]], i32 6, i1 false)
+ // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_VAL]], ptr align 2 [[NONATOMIC_TMP]], i64 6, i1 false)
+ // CHECK: [[ATOMIC:%.*]] = load i64, ptr [[ATOMIC_VAL]], align 8
+ // CHECK: [[ATOMIC_RES:%.*]] = atomicrmw xchg ptr [[ADDR]], i64 [[ATOMIC]] seq_cst, align 8
+ // CHECK: store i64 [[ATOMIC_RES]], ptr [[ATOMIC_RES_PTR:%.*]], align 8
+ // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 2 %agg.result, ptr align 8 [[ATOMIC_RES_PTR]], i32 6, i1 false)
return __c11_atomic_exchange(addr, *val, 5);
}
-// CHECK-LABEL: define dso_local arm_aapcscc zeroext i1 @test_promoted_cmpxchg(
-// CHECK-SAME: ptr noundef [[ADDR:%.*]], ptr noundef [[DESIRED:%.*]], ptr noundef [[NEW:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[NEW_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca [[STRUCT_PS:%.*]], align 2
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { [[STRUCT_PS]], [2 x i8] }, align 8
-// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca { [[STRUCT_PS]], [2 x i8] }, align 8
-// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1
-// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
-// CHECK-NEXT: store ptr [[DESIRED]], ptr [[DESIRED_ADDR]], align 4
-// CHECK-NEXT: store ptr [[NEW]], ptr [[NEW_ADDR]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DESIRED_ADDR]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[NEW_ADDR]], align 4
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[DOTATOMICTMP]], ptr align 2 [[TMP2]], i32 6, i1 false)
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_TEMP]], ptr align 2 [[TMP1]], i64 6, i1 false)
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_TEMP1]], ptr align 2 [[DOTATOMICTMP]], i64 6, i1 false)
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[ATOMIC_TEMP1]], align 8
-// CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_8:%.*]] = call i8 @__atomic_compare_exchange_8(ptr [[TMP0]], ptr [[ATOMIC_TEMP]], i64 [[CMPXCHG_DESIRED]], i32 5, i32 5)
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_8]], 0
-// CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1
-// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1
-// CHECK-NEXT: ret i1 [[LOADEDV]]
-//
_Bool test_promoted_cmpxchg(_Atomic(PS) *addr, PS *desired, PS *new) {
+ // CHECK-LABEL: i1 @test_promoted_cmpxchg(ptr noundef %addr, ptr noundef %desired, ptr noundef %new) #0 {
+ // CHECK: [[ADDR_ARG:%.*]] = alloca ptr, align 4
+ // CHECK: [[DESIRED_ARG:%.*]] = alloca ptr, align 4
+ // CHECK: [[NEW_ARG:%.*]] = alloca ptr, align 4
+ // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+ // CHECK: [[ATOMIC_DESIRED:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+ // CHECK: [[ATOMIC_NEW:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+ // CHECK: store ptr %addr, ptr [[ADDR_ARG]], align 4
+ // CHECK: store ptr %desired, ptr [[DESIRED_ARG]], align 4
+ // CHECK: store ptr %new, ptr [[NEW_ARG]], align 4
+ // CHECK: [[ADDR:%.*]] = load ptr, ptr [[ADDR_ARG]], align 4
+ // CHECK: [[DESIRED:%.*]] = load ptr, ptr [[DESIRED_ARG]], align 4
+ // CHECK: [[NEW:%.*]] = load ptr, ptr [[NEW_ARG]], align 4
+ // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[NONATOMIC_TMP]], ptr align 2 [[NEW]], i32 6, i1 false)
+ // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_DESIRED]], ptr align 2 [[DESIRED]], i64 6, i1 false)
+ // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_NEW]], ptr align 2 [[NONATOMIC_TMP]], i64 6, i1 false)
+ // CHECK: [[VAL1:%.*]] = load i64, ptr [[ATOMIC_DESIRED]], align 8
+ // CHECK: [[VAL2:%.*]] = load i64, ptr [[ATOMIC_NEW]], align 8
+ // CHECK: [[RES_PAIR:%.*]] = cmpxchg ptr [[ADDR]], i64 [[VAL1]], i64 [[VAL2]] seq_cst seq_cst, align 8
+ // CHECK: [[RES:%.*]] = extractvalue { i64, i1 } [[RES_PAIR]], 1
return __c11_atomic_compare_exchange_strong(addr, desired, *new, 5, 5);
}
struct Empty {};
-// CHECK-LABEL: define dso_local arm_aapcscc void @test_empty_struct_load(
-// CHECK-SAME: ptr noundef [[EMPTY:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_EMPTY:%.*]], align 1
-// CHECK-NEXT: [[EMPTY_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { [[STRUCT_EMPTY]], [1 x i8] }, align 1
-// CHECK-NEXT: store ptr [[EMPTY]], ptr [[EMPTY_ADDR]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[EMPTY_ADDR]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load atomic i8, ptr [[TMP0]] seq_cst, align 1
-// CHECK-NEXT: store i8 [[TMP1]], ptr [[ATOMIC_TEMP]], align 1
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[RETVAL]], ptr align 1 [[ATOMIC_TEMP]], i32 0, i1 false)
-// CHECK-NEXT: ret void
-//
struct Empty test_empty_struct_load(_Atomic(struct Empty)* empty) {
+ // CHECK-LABEL: @test_empty_struct_load(
+ // CHECK: load atomic i8, ptr {{.*}}, align 1
return __c11_atomic_load(empty, 5);
}
-// CHECK-LABEL: define dso_local arm_aapcscc void @test_empty_struct_store(
-// CHECK-SAME: ptr noundef [[EMPTY:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VALUE:%.*]] = alloca [[STRUCT_EMPTY:%.*]], align 1
-// CHECK-NEXT: [[EMPTY_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca [[STRUCT_EMPTY]], align 1
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { [[STRUCT_EMPTY]], [1 x i8] }, align 1
-// CHECK-NEXT: store ptr [[EMPTY]], ptr [[EMPTY_ADDR]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[EMPTY_ADDR]], align 4
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DOTATOMICTMP]], ptr align 1 [[VALUE]], i32 0, i1 false)
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[ATOMIC_TEMP]], ptr align 1 [[DOTATOMICTMP]], i64 0, i1 false)
-// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
-// CHECK-NEXT: store atomic i8 [[TMP1]], ptr [[TMP0]] seq_cst, align 1
-// CHECK-NEXT: ret void
-//
void test_empty_struct_store(_Atomic(struct Empty)* empty, struct Empty value) {
+ // CHECK-LABEL: @test_empty_struct_store(
+ // CHECK: store atomic i8 {{.*}}, ptr {{.*}}, align 1
__c11_atomic_store(empty, value, 5);
}
diff --git a/clang/test/CodeGen/sanitize-atomic-int-overflow.c b/clang/test/CodeGen/sanitize-atomic-int-overflow.c
index 6dacd3c5a81e63..da8152ad7aad1f 100644
--- a/clang/test/CodeGen/sanitize-atomic-int-overflow.c
+++ b/clang/test/CodeGen/sanitize-atomic-int-overflow.c
@@ -1,18 +1,33 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple x86_64-apple-macosx10.14.0 -fsanitize=unsigned-integer-overflow %s -emit-llvm -o - | FileCheck %s
_Atomic(unsigned) atomic;
+// CHECK-LABEL: define{{.*}} void @cmpd_assign
void cmpd_assign(void) {
+ // CHECK: br label %[[LOOP_START:.*]]
+ // CHECK: [[LOOP_START]]:
+ // CHECK-NEXT: phi i32 {{.*}}, [ {{.*}}, %[[INCOMING_BLOCK:.*]] ]
+ // CHECK: [[INCOMING_BLOCK]]:
+ // CHECK-NEXT: cmpxchg
+ // CHECK-NEXT: extractvalue
+ // CHECK-NEXT: extractvalue
+ // CHECK-NEXT: br i1 %8, label %{{.*}}, label %[[LOOP_START]]
atomic += 1;
}
+// CHECK-LABEL: define{{.*}} void @inc
void inc(void) {
+ // CHECK: br label %[[LOOP_START:.*]]
+ // CHECK: [[LOOP_START]]:
+ // CHECK-NEXT: phi i32 {{.*}}, [ {{.*}}, %[[INCOMING_BLOCK:.*]] ]
+ // CHECK: [[INCOMING_BLOCK]]:
+ // CHECK-NEXT: cmpxchg
+ // CHECK-NEXT: extractvalue
+ // CHECK-NEXT: extractvalue
+ // CHECK-NEXT: br i1 %8, label %{{.*}}, label %[[LOOP_START]]
atomic++;
}
-//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-// CHECK: {{.*}}
diff --git a/clang/test/CodeGen/stack-arg-probe.c b/clang/test/CodeGen/stack-arg-probe.c
index 1babaa959c7100..255ae482b68083 100644
--- a/clang/test/CodeGen/stack-arg-probe.c
+++ b/clang/test/CodeGen/stack-arg-probe.c
@@ -1,8 +1,10 @@
// RUN: %clang_cc1 %s -triple=i686-windows-msvc -emit-llvm -o - -mno-stack-arg-probe | FileCheck %s -check-prefix=NO-STACKPROBE
// RUN: %clang_cc1 %s -triple=x86_64-windows-msvc -emit-llvm -o - -mno-stack-arg-probe | FileCheck %s -check-prefix=NO-STACKPROBE
+// RUN: %clang_cc1 %s -triple=armv7-windows-msvc -emit-llvm -o - -mno-stack-arg-probe | FileCheck %s -check-prefix=NO-STACKPROBE
// RUN: %clang_cc1 %s -triple=aarch64-windows-msvc -emit-llvm -o - -mno-stack-arg-probe | FileCheck %s -check-prefix=NO-STACKPROBE
// RUN: %clang_cc1 %s -triple=i686-windows-msvc -emit-llvm -o - | FileCheck %s -check-prefix=STACKPROBE
// RUN: %clang_cc1 %s -triple=x86_64-windows-msvc -emit-llvm -o - | FileCheck %s -check-prefix=STACKPROBE
+// RUN: %clang_cc1 %s -triple=armv7-windows-msvc -emit-llvm -o - | FileCheck %s -check-prefix=STACKPROBE
// RUN: %clang_cc1 %s -triple=aarch64-windows-msvc -emit-llvm -o - | FileCheck %s -check-prefix=STACKPROBE
diff --git a/clang/test/CodeGenCUDA/atomic-ops.cu b/clang/test/CodeGenCUDA/atomic-ops.cu
index c9c787001610c8..fbc042caa809f9 100644
--- a/clang/test/CodeGenCUDA/atomic-ops.cu
+++ b/clang/test/CodeGenCUDA/atomic-ops.cu
@@ -1,180 +1,19 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -x hip -std=c++11 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s
#include "Inputs/cuda.h"
-// CHECK-LABEL: define dso_local noundef i32 @_Z24atomic32_op_singlethreadPiii(
-// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[VAL:%.*]], i32 noundef [[DESIRED:%.*]]) #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[FLAG:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP10:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP15:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP16:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP17:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP18:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP19:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP21:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP22:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP23:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP24:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP25:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP26:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
-// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
-// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
-// CHECK-NEXT: [[FLAG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAG]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
-// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
-// CHECK-NEXT: [[CMPXCHG_BOOL2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL2]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP10]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP15]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP16]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP17_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP17]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP18_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP18]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP19_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP19]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP20]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP21]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP22_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP22]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP23_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP23]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP24_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP24]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP25_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP25]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP26_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP26]] to ptr
-// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("singlethread-one-as") monotonic monotonic, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
-// CHECK-NEXT: store i8 [[STOREDV]], ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTATOMICTMP1_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_EXPECTED3:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i32 [[CMPXCHG_EXPECTED3]], i32 [[CMPXCHG_DESIRED4]] syncscope("singlethread-one-as") monotonic monotonic, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
-// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
-// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
-// CHECK-NEXT: store i8 [[STOREDV9]], ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP7]], ptr [[DOTATOMICTMP10_ASCAST]], align 4
-// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTATOMICTMP10_ASCAST]], align 4
-// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw xchg ptr [[TMP6]], i32 [[TMP8]] syncscope("singlethread-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP9]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
-// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP10]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP12]], ptr [[DOTATOMICTMP11_ASCAST]], align 4
-// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTATOMICTMP11_ASCAST]], align 4
-// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP11]], i32 [[TMP13]] syncscope("singlethread-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP14]], ptr [[ATOMIC_TEMP12_ASCAST]], align 4
-// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ATOMIC_TEMP12_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP15]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP17]], ptr [[DOTATOMICTMP13_ASCAST]], align 4
-// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTATOMICTMP13_ASCAST]], align 4
-// CHECK-NEXT: [[TMP19:%.*]] = atomicrmw sub ptr [[TMP16]], i32 [[TMP18]] syncscope("singlethread-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP19]], ptr [[ATOMIC_TEMP14_ASCAST]], align 4
-// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ATOMIC_TEMP14_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP20]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP22]], ptr [[DOTATOMICTMP15_ASCAST]], align 4
-// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTATOMICTMP15_ASCAST]], align 4
-// CHECK-NEXT: [[TMP24:%.*]] = atomicrmw and ptr [[TMP21]], i32 [[TMP23]] syncscope("singlethread-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP24]], ptr [[ATOMIC_TEMP16_ASCAST]], align 4
-// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[ATOMIC_TEMP16_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP25]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP27]], ptr [[DOTATOMICTMP17_ASCAST]], align 4
-// CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTATOMICTMP17_ASCAST]], align 4
-// CHECK-NEXT: [[TMP29:%.*]] = atomicrmw or ptr [[TMP26]], i32 [[TMP28]] syncscope("singlethread-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP29]], ptr [[ATOMIC_TEMP18_ASCAST]], align 4
-// CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ATOMIC_TEMP18_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP30]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP32]], ptr [[DOTATOMICTMP19_ASCAST]], align 4
-// CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTATOMICTMP19_ASCAST]], align 4
-// CHECK-NEXT: [[TMP34:%.*]] = atomicrmw xor ptr [[TMP31]], i32 [[TMP33]] syncscope("singlethread-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP34]], ptr [[ATOMIC_TEMP20_ASCAST]], align 4
-// CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[ATOMIC_TEMP20_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP35]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP37]], ptr [[DOTATOMICTMP21_ASCAST]], align 4
-// CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTATOMICTMP21_ASCAST]], align 4
-// CHECK-NEXT: [[TMP39:%.*]] = atomicrmw min ptr [[TMP36]], i32 [[TMP38]] syncscope("singlethread-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP39]], ptr [[ATOMIC_TEMP22_ASCAST]], align 4
-// CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr [[ATOMIC_TEMP22_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP40]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP42]], ptr [[DOTATOMICTMP23_ASCAST]], align 4
-// CHECK-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTATOMICTMP23_ASCAST]], align 4
-// CHECK-NEXT: [[TMP44:%.*]] = atomicrmw max ptr [[TMP41]], i32 [[TMP43]] syncscope("singlethread-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP44]], ptr [[ATOMIC_TEMP24_ASCAST]], align 4
-// CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[ATOMIC_TEMP24_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP45]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP46:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP47:%.*]] = load atomic i32, ptr [[TMP46]] syncscope("singlethread-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP47]], ptr [[ATOMIC_TEMP25_ASCAST]], align 4
-// CHECK-NEXT: [[TMP48:%.*]] = load i32, ptr [[ATOMIC_TEMP25_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP48]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP49:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP50]], ptr [[DOTATOMICTMP26_ASCAST]], align 4
-// CHECK-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTATOMICTMP26_ASCAST]], align 4
-// CHECK-NEXT: store atomic i32 [[TMP51]], ptr [[TMP49]] syncscope("singlethread-one-as") monotonic, align 4
-// CHECK-NEXT: [[TMP52:%.*]] = load i8, ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV27:%.*]] = trunc i8 [[TMP52]] to i1
-// CHECK-NEXT: br i1 [[LOADEDV27]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
-// CHECK: [[COND_TRUE]]:
-// CHECK-NEXT: [[TMP53:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: br label %[[COND_END:.*]]
-// CHECK: [[COND_FALSE]]:
-// CHECK-NEXT: [[TMP54:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: br label %[[COND_END]]
-// CHECK: [[COND_END]]:
-// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP53]], %[[COND_TRUE]] ], [ [[TMP54]], %[[COND_FALSE]] ]
-// CHECK-NEXT: ret i32 [[COND]]
-//
+// CHECK-LABEL: @_Z24atomic32_op_singlethreadPiii
+// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") monotonic monotonic, align 4
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: load atomic i32, ptr {{%[0-9]+}} syncscope("singlethread-one-as") monotonic, align 4
+// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("singlethread-one-as") monotonic, align 4
__device__ int atomic32_op_singlethread(int *ptr, int val, int desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
@@ -191,226 +30,28 @@ __device__ int atomic32_op_singlethread(int *ptr, int val, int desired) {
return flag ? val : desired;
}
-// CHECK-LABEL: define dso_local noundef i32 @_Z25atomicu32_op_singlethreadPjjj(
-// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[VAL:%.*]], i32 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
-// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
-// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
-// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw umin ptr [[TMP0]], i32 [[TMP2]] syncscope("singlethread-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP4]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTATOMICTMP1_ASCAST]], align 4
-// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
-// CHECK-NEXT: [[TMP8:%.*]] = atomicrmw umax ptr [[TMP5]], i32 [[TMP7]] syncscope("singlethread-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP8]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4
-// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP9]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: ret i32 [[TMP10]]
-//
+// CHECK-LABEL: @_Z25atomicu32_op_singlethreadPjjj
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
__device__ unsigned int atomicu32_op_singlethread(unsigned int *ptr, unsigned int val, unsigned int desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
return val;
}
-// CHECK-LABEL: define dso_local noundef i32 @_Z21atomic32_op_wavefrontPiii(
-// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[VAL:%.*]], i32 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[FLAG:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP10:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP15:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP16:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP17:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP18:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP19:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP21:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP22:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP23:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP24:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP25:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP26:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
-// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
-// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
-// CHECK-NEXT: [[FLAG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAG]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
-// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
-// CHECK-NEXT: [[CMPXCHG_BOOL2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL2]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP10]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP15]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP16]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP17_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP17]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP18_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP18]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP19_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP19]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP20]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP21]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP22_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP22]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP23_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP23]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP24_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP24]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP25_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP25]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP26_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP26]] to ptr
-// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("wavefront-one-as") monotonic monotonic, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
-// CHECK-NEXT: store i8 [[STOREDV]], ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTATOMICTMP1_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_EXPECTED3:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i32 [[CMPXCHG_EXPECTED3]], i32 [[CMPXCHG_DESIRED4]] syncscope("wavefront-one-as") monotonic monotonic, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
-// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
-// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
-// CHECK-NEXT: store i8 [[STOREDV9]], ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP7]], ptr [[DOTATOMICTMP10_ASCAST]], align 4
-// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTATOMICTMP10_ASCAST]], align 4
-// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw xchg ptr [[TMP6]], i32 [[TMP8]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP9]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
-// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP10]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP12]], ptr [[DOTATOMICTMP11_ASCAST]], align 4
-// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTATOMICTMP11_ASCAST]], align 4
-// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP11]], i32 [[TMP13]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP14]], ptr [[ATOMIC_TEMP12_ASCAST]], align 4
-// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ATOMIC_TEMP12_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP15]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP17]], ptr [[DOTATOMICTMP13_ASCAST]], align 4
-// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTATOMICTMP13_ASCAST]], align 4
-// CHECK-NEXT: [[TMP19:%.*]] = atomicrmw sub ptr [[TMP16]], i32 [[TMP18]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP19]], ptr [[ATOMIC_TEMP14_ASCAST]], align 4
-// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ATOMIC_TEMP14_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP20]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP22]], ptr [[DOTATOMICTMP15_ASCAST]], align 4
-// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTATOMICTMP15_ASCAST]], align 4
-// CHECK-NEXT: [[TMP24:%.*]] = atomicrmw and ptr [[TMP21]], i32 [[TMP23]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP24]], ptr [[ATOMIC_TEMP16_ASCAST]], align 4
-// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[ATOMIC_TEMP16_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP25]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP27]], ptr [[DOTATOMICTMP17_ASCAST]], align 4
-// CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTATOMICTMP17_ASCAST]], align 4
-// CHECK-NEXT: [[TMP29:%.*]] = atomicrmw or ptr [[TMP26]], i32 [[TMP28]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP29]], ptr [[ATOMIC_TEMP18_ASCAST]], align 4
-// CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ATOMIC_TEMP18_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP30]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP32]], ptr [[DOTATOMICTMP19_ASCAST]], align 4
-// CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTATOMICTMP19_ASCAST]], align 4
-// CHECK-NEXT: [[TMP34:%.*]] = atomicrmw xor ptr [[TMP31]], i32 [[TMP33]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP34]], ptr [[ATOMIC_TEMP20_ASCAST]], align 4
-// CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[ATOMIC_TEMP20_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP35]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP37]], ptr [[DOTATOMICTMP21_ASCAST]], align 4
-// CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTATOMICTMP21_ASCAST]], align 4
-// CHECK-NEXT: [[TMP39:%.*]] = atomicrmw min ptr [[TMP36]], i32 [[TMP38]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP39]], ptr [[ATOMIC_TEMP22_ASCAST]], align 4
-// CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr [[ATOMIC_TEMP22_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP40]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP42]], ptr [[DOTATOMICTMP23_ASCAST]], align 4
-// CHECK-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTATOMICTMP23_ASCAST]], align 4
-// CHECK-NEXT: [[TMP44:%.*]] = atomicrmw max ptr [[TMP41]], i32 [[TMP43]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP44]], ptr [[ATOMIC_TEMP24_ASCAST]], align 4
-// CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[ATOMIC_TEMP24_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP45]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP46:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP47:%.*]] = load atomic i32, ptr [[TMP46]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP47]], ptr [[ATOMIC_TEMP25_ASCAST]], align 4
-// CHECK-NEXT: [[TMP48:%.*]] = load i32, ptr [[ATOMIC_TEMP25_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP48]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP49:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP50]], ptr [[DOTATOMICTMP26_ASCAST]], align 4
-// CHECK-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTATOMICTMP26_ASCAST]], align 4
-// CHECK-NEXT: store atomic i32 [[TMP51]], ptr [[TMP49]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK-NEXT: [[TMP52:%.*]] = load i8, ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV27:%.*]] = trunc i8 [[TMP52]] to i1
-// CHECK-NEXT: br i1 [[LOADEDV27]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
-// CHECK: [[COND_TRUE]]:
-// CHECK-NEXT: [[TMP53:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: br label %[[COND_END:.*]]
-// CHECK: [[COND_FALSE]]:
-// CHECK-NEXT: [[TMP54:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: br label %[[COND_END]]
-// CHECK: [[COND_END]]:
-// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP53]], %[[COND_TRUE]] ], [ [[TMP54]], %[[COND_FALSE]] ]
-// CHECK-NEXT: ret i32 [[COND]]
-//
+// CHECK-LABEL: @_Z21atomic32_op_wavefrontPiii
+// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") monotonic monotonic, align 4
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: load atomic i32, ptr {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 4
+// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("wavefront-one-as") monotonic, align 4
__device__ int atomic32_op_wavefront(int *ptr, int val, int desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
@@ -427,219 +68,27 @@ __device__ int atomic32_op_wavefront(int *ptr, int val, int desired) {
return flag ? val : desired;
}
-// CHECK-LABEL: define dso_local noundef i32 @_Z22atomicu32_op_wavefrontPjjj(
-// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[VAL:%.*]], i32 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
-// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
-// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
-// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw umin ptr [[TMP0]], i32 [[TMP2]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP4]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTATOMICTMP1_ASCAST]], align 4
-// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
-// CHECK-NEXT: [[TMP8:%.*]] = atomicrmw umax ptr [[TMP5]], i32 [[TMP7]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP8]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4
-// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP9]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: ret i32 [[TMP10]]
-//
+// CHECK-LABEL: @_Z22atomicu32_op_wavefrontPjjj
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
__device__ unsigned int atomicu32_op_wavefront(unsigned int *ptr, unsigned int val, unsigned int desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
return val;
}
-// CHECK-LABEL: define dso_local noundef i32 @_Z21atomic32_op_workgroupPiii(
-// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[VAL:%.*]], i32 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[FLAG:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP10:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP15:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP16:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP17:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP18:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP19:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP21:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP22:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP23:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP24:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP25:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
-// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
-// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
-// CHECK-NEXT: [[FLAG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAG]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
-// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
-// CHECK-NEXT: [[CMPXCHG_BOOL2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL2]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP10]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP15]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP16]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP17_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP17]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP18_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP18]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP19_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP19]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP20]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP21]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP22_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP22]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP23_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP23]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP24_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP24]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP25_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP25]] to ptr
-// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("workgroup-one-as") monotonic monotonic, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
-// CHECK-NEXT: store i8 [[STOREDV]], ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTATOMICTMP1_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_EXPECTED3:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i32 [[CMPXCHG_EXPECTED3]], i32 [[CMPXCHG_DESIRED4]] syncscope("workgroup-one-as") monotonic monotonic, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
-// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
-// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
-// CHECK-NEXT: store i8 [[STOREDV9]], ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP7]], ptr [[DOTATOMICTMP10_ASCAST]], align 4
-// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTATOMICTMP10_ASCAST]], align 4
-// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw xchg ptr [[TMP6]], i32 [[TMP8]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP9]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
-// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP10]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP12]], ptr [[DOTATOMICTMP11_ASCAST]], align 4
-// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTATOMICTMP11_ASCAST]], align 4
-// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP11]], i32 [[TMP13]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP14]], ptr [[ATOMIC_TEMP12_ASCAST]], align 4
-// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ATOMIC_TEMP12_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP15]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP17]], ptr [[DOTATOMICTMP13_ASCAST]], align 4
-// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTATOMICTMP13_ASCAST]], align 4
-// CHECK-NEXT: [[TMP19:%.*]] = atomicrmw sub ptr [[TMP16]], i32 [[TMP18]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP19]], ptr [[ATOMIC_TEMP14_ASCAST]], align 4
-// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ATOMIC_TEMP14_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP20]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP22]], ptr [[DOTATOMICTMP15_ASCAST]], align 4
-// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTATOMICTMP15_ASCAST]], align 4
-// CHECK-NEXT: [[TMP24:%.*]] = atomicrmw and ptr [[TMP21]], i32 [[TMP23]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP24]], ptr [[ATOMIC_TEMP16_ASCAST]], align 4
-// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[ATOMIC_TEMP16_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP25]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP27]], ptr [[DOTATOMICTMP17_ASCAST]], align 4
-// CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTATOMICTMP17_ASCAST]], align 4
-// CHECK-NEXT: [[TMP29:%.*]] = atomicrmw or ptr [[TMP26]], i32 [[TMP28]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP29]], ptr [[ATOMIC_TEMP18_ASCAST]], align 4
-// CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ATOMIC_TEMP18_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP30]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP32]], ptr [[DOTATOMICTMP19_ASCAST]], align 4
-// CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTATOMICTMP19_ASCAST]], align 4
-// CHECK-NEXT: [[TMP34:%.*]] = atomicrmw xor ptr [[TMP31]], i32 [[TMP33]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP34]], ptr [[ATOMIC_TEMP20_ASCAST]], align 4
-// CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[ATOMIC_TEMP20_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP35]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP37]], ptr [[DOTATOMICTMP21_ASCAST]], align 4
-// CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTATOMICTMP21_ASCAST]], align 4
-// CHECK-NEXT: [[TMP39:%.*]] = atomicrmw min ptr [[TMP36]], i32 [[TMP38]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP39]], ptr [[ATOMIC_TEMP22_ASCAST]], align 4
-// CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr [[ATOMIC_TEMP22_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP40]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP42]], ptr [[DOTATOMICTMP23_ASCAST]], align 4
-// CHECK-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTATOMICTMP23_ASCAST]], align 4
-// CHECK-NEXT: [[TMP44:%.*]] = atomicrmw max ptr [[TMP41]], i32 [[TMP43]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP44]], ptr [[ATOMIC_TEMP24_ASCAST]], align 4
-// CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[ATOMIC_TEMP24_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP45]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP46:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP47:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP47]], ptr [[DOTATOMICTMP25_ASCAST]], align 4
-// CHECK-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTATOMICTMP25_ASCAST]], align 4
-// CHECK-NEXT: store atomic i32 [[TMP48]], ptr [[TMP46]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK-NEXT: [[TMP49:%.*]] = load i8, ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV26:%.*]] = trunc i8 [[TMP49]] to i1
-// CHECK-NEXT: br i1 [[LOADEDV26]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
-// CHECK: [[COND_TRUE]]:
-// CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: br label %[[COND_END:.*]]
-// CHECK: [[COND_FALSE]]:
-// CHECK-NEXT: [[TMP51:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: br label %[[COND_END]]
-// CHECK: [[COND_END]]:
-// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP50]], %[[COND_TRUE]] ], [ [[TMP51]], %[[COND_FALSE]] ]
-// CHECK-NEXT: ret i32 [[COND]]
-//
+// CHECK-LABEL: @_Z21atomic32_op_workgroupPiii
+// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") monotonic monotonic, align 4
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("workgroup-one-as") monotonic, align 4
__device__ int atomic32_op_workgroup(int *ptr, int val, int desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
@@ -655,219 +104,27 @@ __device__ int atomic32_op_workgroup(int *ptr, int val, int desired) {
return flag ? val : desired;
}
-// CHECK-LABEL: define dso_local noundef i32 @_Z22atomicu32_op_workgroupPjjj(
-// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[VAL:%.*]], i32 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
-// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
-// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
-// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw umin ptr [[TMP0]], i32 [[TMP2]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP4]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTATOMICTMP1_ASCAST]], align 4
-// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
-// CHECK-NEXT: [[TMP8:%.*]] = atomicrmw umax ptr [[TMP5]], i32 [[TMP7]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP8]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4
-// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP9]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: ret i32 [[TMP10]]
-//
+// CHECK-LABEL: @_Z22atomicu32_op_workgroupPjjj
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
__device__ unsigned int atomicu32_op_workgroup(unsigned int *ptr, unsigned int val, unsigned int desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
return val;
}
-// CHECK-LABEL: define dso_local noundef i32 @_Z17atomic32_op_agentPiii(
-// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[VAL:%.*]], i32 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[FLAG:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP10:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP15:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP16:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP17:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP18:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP19:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP21:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP22:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP23:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP24:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP25:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
-// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
-// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
-// CHECK-NEXT: [[FLAG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAG]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
-// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
-// CHECK-NEXT: [[CMPXCHG_BOOL2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL2]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP10]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP15]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP16]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP17_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP17]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP18_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP18]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP19_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP19]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP20]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP21]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP22_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP22]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP23_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP23]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP24_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP24]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP25_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP25]] to ptr
-// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("agent-one-as") monotonic monotonic, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
-// CHECK-NEXT: store i8 [[STOREDV]], ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTATOMICTMP1_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_EXPECTED3:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i32 [[CMPXCHG_EXPECTED3]], i32 [[CMPXCHG_DESIRED4]] syncscope("agent-one-as") monotonic monotonic, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
-// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
-// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
-// CHECK-NEXT: store i8 [[STOREDV9]], ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP7]], ptr [[DOTATOMICTMP10_ASCAST]], align 4
-// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTATOMICTMP10_ASCAST]], align 4
-// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw xchg ptr [[TMP6]], i32 [[TMP8]] syncscope("agent-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP9]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
-// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP10]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP12]], ptr [[DOTATOMICTMP11_ASCAST]], align 4
-// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTATOMICTMP11_ASCAST]], align 4
-// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP11]], i32 [[TMP13]] syncscope("agent-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP14]], ptr [[ATOMIC_TEMP12_ASCAST]], align 4
-// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ATOMIC_TEMP12_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP15]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP17]], ptr [[DOTATOMICTMP13_ASCAST]], align 4
-// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTATOMICTMP13_ASCAST]], align 4
-// CHECK-NEXT: [[TMP19:%.*]] = atomicrmw sub ptr [[TMP16]], i32 [[TMP18]] syncscope("agent-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP19]], ptr [[ATOMIC_TEMP14_ASCAST]], align 4
-// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ATOMIC_TEMP14_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP20]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP22]], ptr [[DOTATOMICTMP15_ASCAST]], align 4
-// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTATOMICTMP15_ASCAST]], align 4
-// CHECK-NEXT: [[TMP24:%.*]] = atomicrmw and ptr [[TMP21]], i32 [[TMP23]] syncscope("agent-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP24]], ptr [[ATOMIC_TEMP16_ASCAST]], align 4
-// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[ATOMIC_TEMP16_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP25]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP27]], ptr [[DOTATOMICTMP17_ASCAST]], align 4
-// CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTATOMICTMP17_ASCAST]], align 4
-// CHECK-NEXT: [[TMP29:%.*]] = atomicrmw or ptr [[TMP26]], i32 [[TMP28]] syncscope("agent-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP29]], ptr [[ATOMIC_TEMP18_ASCAST]], align 4
-// CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ATOMIC_TEMP18_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP30]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP32]], ptr [[DOTATOMICTMP19_ASCAST]], align 4
-// CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTATOMICTMP19_ASCAST]], align 4
-// CHECK-NEXT: [[TMP34:%.*]] = atomicrmw xor ptr [[TMP31]], i32 [[TMP33]] syncscope("agent-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP34]], ptr [[ATOMIC_TEMP20_ASCAST]], align 4
-// CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[ATOMIC_TEMP20_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP35]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP37]], ptr [[DOTATOMICTMP21_ASCAST]], align 4
-// CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTATOMICTMP21_ASCAST]], align 4
-// CHECK-NEXT: [[TMP39:%.*]] = atomicrmw min ptr [[TMP36]], i32 [[TMP38]] syncscope("agent-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP39]], ptr [[ATOMIC_TEMP22_ASCAST]], align 4
-// CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr [[ATOMIC_TEMP22_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP40]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP42]], ptr [[DOTATOMICTMP23_ASCAST]], align 4
-// CHECK-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTATOMICTMP23_ASCAST]], align 4
-// CHECK-NEXT: [[TMP44:%.*]] = atomicrmw max ptr [[TMP41]], i32 [[TMP43]] syncscope("agent-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP44]], ptr [[ATOMIC_TEMP24_ASCAST]], align 4
-// CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[ATOMIC_TEMP24_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP45]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP46:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP47:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP47]], ptr [[DOTATOMICTMP25_ASCAST]], align 4
-// CHECK-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTATOMICTMP25_ASCAST]], align 4
-// CHECK-NEXT: store atomic i32 [[TMP48]], ptr [[TMP46]] syncscope("agent-one-as") monotonic, align 4
-// CHECK-NEXT: [[TMP49:%.*]] = load i8, ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV26:%.*]] = trunc i8 [[TMP49]] to i1
-// CHECK-NEXT: br i1 [[LOADEDV26]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
-// CHECK: [[COND_TRUE]]:
-// CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: br label %[[COND_END:.*]]
-// CHECK: [[COND_FALSE]]:
-// CHECK-NEXT: [[TMP51:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: br label %[[COND_END]]
-// CHECK: [[COND_END]]:
-// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP50]], %[[COND_TRUE]] ], [ [[TMP51]], %[[COND_FALSE]] ]
-// CHECK-NEXT: ret i32 [[COND]]
-//
+// CHECK-LABEL: @_Z17atomic32_op_agentPiii
+// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") monotonic monotonic, align 4
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("agent-one-as") monotonic, align 4
__device__ int atomic32_op_agent(int *ptr, int val, int desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
@@ -883,226 +140,28 @@ __device__ int atomic32_op_agent(int *ptr, int val, int desired) {
return flag ? val : desired;
}
-// CHECK-LABEL: define dso_local noundef i32 @_Z18atomicu32_op_agentPjjj(
-// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[VAL:%.*]], i32 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
-// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
-// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
-// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw umin ptr [[TMP0]], i32 [[TMP2]] syncscope("agent-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP4]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTATOMICTMP1_ASCAST]], align 4
-// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
-// CHECK-NEXT: [[TMP8:%.*]] = atomicrmw umax ptr [[TMP5]], i32 [[TMP7]] syncscope("agent-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP8]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4
-// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP9]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: ret i32 [[TMP10]]
-//
+// CHECK-LABEL: @_Z18atomicu32_op_agentPjjj
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
__device__ unsigned int atomicu32_op_agent(unsigned int *ptr, unsigned int val, unsigned int desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
return val;
}
-// CHECK-LABEL: define dso_local noundef i32 @_Z18atomic32_op_systemPiii(
-// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[VAL:%.*]], i32 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[FLAG:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP10:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP15:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP16:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP17:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP18:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP19:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP21:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP22:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP23:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP24:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP25:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP26:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
-// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
-// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
-// CHECK-NEXT: [[FLAG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAG]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
-// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
-// CHECK-NEXT: [[CMPXCHG_BOOL2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL2]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP10]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP15]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP16]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP17_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP17]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP18_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP18]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP19_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP19]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP20]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP21]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP22_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP22]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP23_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP23]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP24_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP24]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP25_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP25]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP26_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP26]] to ptr
-// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("one-as") monotonic monotonic, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
-// CHECK-NEXT: store i8 [[STOREDV]], ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTATOMICTMP1_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_EXPECTED3:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i32 [[CMPXCHG_EXPECTED3]], i32 [[CMPXCHG_DESIRED4]] syncscope("one-as") monotonic monotonic, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
-// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
-// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
-// CHECK-NEXT: store i8 [[STOREDV9]], ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP7]], ptr [[DOTATOMICTMP10_ASCAST]], align 4
-// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTATOMICTMP10_ASCAST]], align 4
-// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw xchg ptr [[TMP6]], i32 [[TMP8]] syncscope("one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP9]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
-// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP10]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP12]], ptr [[DOTATOMICTMP11_ASCAST]], align 4
-// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTATOMICTMP11_ASCAST]], align 4
-// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP11]], i32 [[TMP13]] syncscope("one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP14]], ptr [[ATOMIC_TEMP12_ASCAST]], align 4
-// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ATOMIC_TEMP12_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP15]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP17]], ptr [[DOTATOMICTMP13_ASCAST]], align 4
-// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTATOMICTMP13_ASCAST]], align 4
-// CHECK-NEXT: [[TMP19:%.*]] = atomicrmw sub ptr [[TMP16]], i32 [[TMP18]] syncscope("one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP19]], ptr [[ATOMIC_TEMP14_ASCAST]], align 4
-// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ATOMIC_TEMP14_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP20]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP22]], ptr [[DOTATOMICTMP15_ASCAST]], align 4
-// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTATOMICTMP15_ASCAST]], align 4
-// CHECK-NEXT: [[TMP24:%.*]] = atomicrmw and ptr [[TMP21]], i32 [[TMP23]] syncscope("one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP24]], ptr [[ATOMIC_TEMP16_ASCAST]], align 4
-// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[ATOMIC_TEMP16_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP25]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP27]], ptr [[DOTATOMICTMP17_ASCAST]], align 4
-// CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTATOMICTMP17_ASCAST]], align 4
-// CHECK-NEXT: [[TMP29:%.*]] = atomicrmw or ptr [[TMP26]], i32 [[TMP28]] syncscope("one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP29]], ptr [[ATOMIC_TEMP18_ASCAST]], align 4
-// CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ATOMIC_TEMP18_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP30]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP32]], ptr [[DOTATOMICTMP19_ASCAST]], align 4
-// CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTATOMICTMP19_ASCAST]], align 4
-// CHECK-NEXT: [[TMP34:%.*]] = atomicrmw xor ptr [[TMP31]], i32 [[TMP33]] syncscope("one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP34]], ptr [[ATOMIC_TEMP20_ASCAST]], align 4
-// CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[ATOMIC_TEMP20_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP35]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP37]], ptr [[DOTATOMICTMP21_ASCAST]], align 4
-// CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTATOMICTMP21_ASCAST]], align 4
-// CHECK-NEXT: [[TMP39:%.*]] = atomicrmw min ptr [[TMP36]], i32 [[TMP38]] syncscope("one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP39]], ptr [[ATOMIC_TEMP22_ASCAST]], align 4
-// CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr [[ATOMIC_TEMP22_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP40]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP42]], ptr [[DOTATOMICTMP23_ASCAST]], align 4
-// CHECK-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTATOMICTMP23_ASCAST]], align 4
-// CHECK-NEXT: [[TMP44:%.*]] = atomicrmw max ptr [[TMP41]], i32 [[TMP43]] syncscope("one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP44]], ptr [[ATOMIC_TEMP24_ASCAST]], align 4
-// CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[ATOMIC_TEMP24_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP45]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP46:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP47:%.*]] = load atomic i32, ptr [[TMP46]] syncscope("one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP47]], ptr [[ATOMIC_TEMP25_ASCAST]], align 4
-// CHECK-NEXT: [[TMP48:%.*]] = load i32, ptr [[ATOMIC_TEMP25_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP48]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP49:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP50]], ptr [[DOTATOMICTMP26_ASCAST]], align 4
-// CHECK-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTATOMICTMP26_ASCAST]], align 4
-// CHECK-NEXT: store atomic i32 [[TMP51]], ptr [[TMP49]] syncscope("one-as") monotonic, align 4
-// CHECK-NEXT: [[TMP52:%.*]] = load i8, ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV27:%.*]] = trunc i8 [[TMP52]] to i1
-// CHECK-NEXT: br i1 [[LOADEDV27]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
-// CHECK: [[COND_TRUE]]:
-// CHECK-NEXT: [[TMP53:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: br label %[[COND_END:.*]]
-// CHECK: [[COND_FALSE]]:
-// CHECK-NEXT: [[TMP54:%.*]] = load i32, ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: br label %[[COND_END]]
-// CHECK: [[COND_END]]:
-// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP53]], %[[COND_TRUE]] ], [ [[TMP54]], %[[COND_FALSE]] ]
-// CHECK-NEXT: ret i32 [[COND]]
-//
+// CHECK-LABEL: @_Z18atomic32_op_systemPiii
+// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
+// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") monotonic monotonic, align 4
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
+// CHECK: load i32, ptr %{{.*}}, align 4
+// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("one-as") monotonic, align 4
__device__ int atomic32_op_system(int *ptr, int val, int desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
@@ -1119,222 +178,27 @@ __device__ int atomic32_op_system(int *ptr, int val, int desired) {
return flag ? val : desired;
}
-// CHECK-LABEL: define dso_local noundef i32 @_Z19atomicu32_op_systemPjjj(
-// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[VAL:%.*]], i32 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
-// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
-// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
-// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw umin ptr [[TMP0]], i32 [[TMP2]] syncscope("one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP4]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTATOMICTMP1_ASCAST]], align 4
-// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
-// CHECK-NEXT: [[TMP8:%.*]] = atomicrmw umax ptr [[TMP5]], i32 [[TMP7]] syncscope("one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP8]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4
-// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[TMP9]], ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[VAL_ADDR_ASCAST]], align 4
-// CHECK-NEXT: ret i32 [[TMP10]]
-//
+// CHECK-LABEL: @_Z19atomicu32_op_systemPjjj
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
__device__ unsigned int atomicu32_op_system(unsigned int *ptr, unsigned int val, unsigned int desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
return val;
}
-// CHECK-LABEL: define dso_local noundef i64 @_Z24atomic64_op_singlethreadPxS_xx(
-// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i64 noundef [[VAL:%.*]], i64 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[FLAG:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP10:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP15:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP16:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP17:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP18:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP19:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP21:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP22:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP23:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP24:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP25:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
-// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
-// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
-// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
-// CHECK-NEXT: [[FLAG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAG]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
-// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
-// CHECK-NEXT: [[CMPXCHG_BOOL2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL2]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP10]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP15]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP16]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP17_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP17]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP18_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP18]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP19_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP19]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP20]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP21]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP22_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP22]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP23_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP23]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP24_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP24]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP25_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP25]] to ptr
-// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] syncscope("singlethread-one-as") monotonic monotonic, align 8
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
-// CHECK-NEXT: store i8 [[STOREDV]], ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP4]], ptr [[DOTATOMICTMP1_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_EXPECTED3:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i64 [[CMPXCHG_EXPECTED3]], i64 [[CMPXCHG_DESIRED4]] syncscope("singlethread-one-as") monotonic monotonic, align 8
-// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: store i64 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 1
-// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
-// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
-// CHECK-NEXT: store i8 [[STOREDV9]], ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP7]], ptr [[DOTATOMICTMP10_ASCAST]], align 8
-// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTATOMICTMP10_ASCAST]], align 8
-// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw xchg ptr [[TMP6]], i64 [[TMP8]] syncscope("singlethread-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP9]], ptr [[ATOMIC_TEMP_ASCAST]], align 8
-// CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[ATOMIC_TEMP_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP10]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP12]], ptr [[DOTATOMICTMP11_ASCAST]], align 8
-// CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTATOMICTMP11_ASCAST]], align 8
-// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP11]], i64 [[TMP13]] syncscope("singlethread-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP14]], ptr [[ATOMIC_TEMP12_ASCAST]], align 8
-// CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[ATOMIC_TEMP12_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP15]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP17]], ptr [[DOTATOMICTMP13_ASCAST]], align 8
-// CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTATOMICTMP13_ASCAST]], align 8
-// CHECK-NEXT: [[TMP19:%.*]] = atomicrmw sub ptr [[TMP16]], i64 [[TMP18]] syncscope("singlethread-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP19]], ptr [[ATOMIC_TEMP14_ASCAST]], align 8
-// CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr [[ATOMIC_TEMP14_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP20]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP22]], ptr [[DOTATOMICTMP15_ASCAST]], align 8
-// CHECK-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTATOMICTMP15_ASCAST]], align 8
-// CHECK-NEXT: [[TMP24:%.*]] = atomicrmw and ptr [[TMP21]], i64 [[TMP23]] syncscope("singlethread-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP24]], ptr [[ATOMIC_TEMP16_ASCAST]], align 8
-// CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[ATOMIC_TEMP16_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP25]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP27:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP27]], ptr [[DOTATOMICTMP17_ASCAST]], align 8
-// CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTATOMICTMP17_ASCAST]], align 8
-// CHECK-NEXT: [[TMP29:%.*]] = atomicrmw or ptr [[TMP26]], i64 [[TMP28]] syncscope("singlethread-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP29]], ptr [[ATOMIC_TEMP18_ASCAST]], align 8
-// CHECK-NEXT: [[TMP30:%.*]] = load i64, ptr [[ATOMIC_TEMP18_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP30]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP32]], ptr [[DOTATOMICTMP19_ASCAST]], align 8
-// CHECK-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTATOMICTMP19_ASCAST]], align 8
-// CHECK-NEXT: [[TMP34:%.*]] = atomicrmw xor ptr [[TMP31]], i64 [[TMP33]] syncscope("singlethread-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP34]], ptr [[ATOMIC_TEMP20_ASCAST]], align 8
-// CHECK-NEXT: [[TMP35:%.*]] = load i64, ptr [[ATOMIC_TEMP20_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP35]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP37:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP37]], ptr [[DOTATOMICTMP21_ASCAST]], align 8
-// CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTATOMICTMP21_ASCAST]], align 8
-// CHECK-NEXT: [[TMP39:%.*]] = atomicrmw min ptr [[TMP36]], i64 [[TMP38]] syncscope("singlethread-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP39]], ptr [[ATOMIC_TEMP22_ASCAST]], align 8
-// CHECK-NEXT: [[TMP40:%.*]] = load i64, ptr [[ATOMIC_TEMP22_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP40]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP42:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP42]], ptr [[DOTATOMICTMP23_ASCAST]], align 8
-// CHECK-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTATOMICTMP23_ASCAST]], align 8
-// CHECK-NEXT: [[TMP44:%.*]] = atomicrmw max ptr [[TMP41]], i64 [[TMP43]] syncscope("singlethread-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP44]], ptr [[ATOMIC_TEMP24_ASCAST]], align 8
-// CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[ATOMIC_TEMP24_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP45]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP46:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP47:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP47]], ptr [[DOTATOMICTMP25_ASCAST]], align 8
-// CHECK-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTATOMICTMP25_ASCAST]], align 8
-// CHECK-NEXT: store atomic i64 [[TMP48]], ptr [[TMP46]] syncscope("singlethread-one-as") monotonic, align 8
-// CHECK-NEXT: [[TMP49:%.*]] = load i8, ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV26:%.*]] = trunc i8 [[TMP49]] to i1
-// CHECK-NEXT: br i1 [[LOADEDV26]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
-// CHECK: [[COND_TRUE]]:
-// CHECK-NEXT: [[TMP50:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: br label %[[COND_END:.*]]
-// CHECK: [[COND_FALSE]]:
-// CHECK-NEXT: [[TMP51:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: br label %[[COND_END]]
-// CHECK: [[COND_END]]:
-// CHECK-NEXT: [[COND:%.*]] = phi i64 [ [[TMP50]], %[[COND_TRUE]] ], [ [[TMP51]], %[[COND_FALSE]] ]
-// CHECK-NEXT: ret i64 [[COND]]
-//
+// CHECK-LABEL: @_Z24atomic64_op_singlethreadPxS_xx
+// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") monotonic monotonic, align 8
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("singlethread-one-as") monotonic, align 8
__device__ long long atomic64_op_singlethread(long long *ptr, long long *ptr2, long long val, long long desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
@@ -1350,64 +214,11 @@ __device__ long long atomic64_op_singlethread(long long *ptr, long long *ptr2, l
return flag ? val : desired;
}
-// CHECK-LABEL: define dso_local noundef i64 @_Z25atomicu64_op_singlethreadPyS_yy(
-// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i64 noundef [[VAL:%.*]], i64 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP4:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
-// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
-// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
-// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP3]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP4]] to ptr
-// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
-// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw umin ptr [[TMP0]], i64 [[TMP2]] syncscope("singlethread-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[ATOMIC_TEMP_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP4]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP6]], ptr [[DOTATOMICTMP1_ASCAST]], align 8
-// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
-// CHECK-NEXT: [[TMP8:%.*]] = atomicrmw umax ptr [[TMP5]], i64 [[TMP7]] syncscope("singlethread-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP8]], ptr [[ATOMIC_TEMP2_ASCAST]], align 8
-// CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[ATOMIC_TEMP2_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP9]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP11:%.*]] = load atomic i64, ptr [[TMP10]] syncscope("singlethread-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP11]], ptr [[ATOMIC_TEMP3_ASCAST]], align 8
-// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[ATOMIC_TEMP3_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP12]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP14]], ptr [[DOTATOMICTMP4_ASCAST]], align 8
-// CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTATOMICTMP4_ASCAST]], align 8
-// CHECK-NEXT: store atomic i64 [[TMP15]], ptr [[TMP13]] syncscope("singlethread-one-as") monotonic, align 8
-// CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: ret i64 [[TMP16]]
-//
+// CHECK-LABEL: @_Z25atomicu64_op_singlethreadPyS_yy
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: load atomic i64, ptr %{{.*}} syncscope("singlethread-one-as") monotonic, align 8
+// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("singlethread-one-as") monotonic, align 8
__device__ unsigned long long atomicu64_op_singlethread(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
@@ -1416,182 +227,19 @@ __device__ unsigned long long atomicu64_op_singlethread(unsigned long long *ptr,
return val;
}
-// CHECK-LABEL: define dso_local noundef i64 @_Z21atomic64_op_wavefrontPxS_xx(
-// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i64 noundef [[VAL:%.*]], i64 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[FLAG:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP10:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP15:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP16:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP17:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP18:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP19:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP21:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP22:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP23:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP24:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP25:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP26:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
-// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
-// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
-// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
-// CHECK-NEXT: [[FLAG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAG]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
-// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
-// CHECK-NEXT: [[CMPXCHG_BOOL2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL2]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP10]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP15]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP16]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP17_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP17]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP18_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP18]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP19_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP19]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP20]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP21]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP22_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP22]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP23_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP23]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP24_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP24]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP25_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP25]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP26_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP26]] to ptr
-// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] syncscope("wavefront-one-as") monotonic monotonic, align 8
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
-// CHECK-NEXT: store i8 [[STOREDV]], ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP4]], ptr [[DOTATOMICTMP1_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_EXPECTED3:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i64 [[CMPXCHG_EXPECTED3]], i64 [[CMPXCHG_DESIRED4]] syncscope("wavefront-one-as") monotonic monotonic, align 8
-// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: store i64 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 1
-// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
-// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
-// CHECK-NEXT: store i8 [[STOREDV9]], ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP7]], ptr [[DOTATOMICTMP10_ASCAST]], align 8
-// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTATOMICTMP10_ASCAST]], align 8
-// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw xchg ptr [[TMP6]], i64 [[TMP8]] syncscope("wavefront-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP9]], ptr [[ATOMIC_TEMP_ASCAST]], align 8
-// CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[ATOMIC_TEMP_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP10]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP12]], ptr [[DOTATOMICTMP11_ASCAST]], align 8
-// CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTATOMICTMP11_ASCAST]], align 8
-// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP11]], i64 [[TMP13]] syncscope("wavefront-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP14]], ptr [[ATOMIC_TEMP12_ASCAST]], align 8
-// CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[ATOMIC_TEMP12_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP15]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP17]], ptr [[DOTATOMICTMP13_ASCAST]], align 8
-// CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTATOMICTMP13_ASCAST]], align 8
-// CHECK-NEXT: [[TMP19:%.*]] = atomicrmw sub ptr [[TMP16]], i64 [[TMP18]] syncscope("wavefront-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP19]], ptr [[ATOMIC_TEMP14_ASCAST]], align 8
-// CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr [[ATOMIC_TEMP14_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP20]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP22]], ptr [[DOTATOMICTMP15_ASCAST]], align 8
-// CHECK-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTATOMICTMP15_ASCAST]], align 8
-// CHECK-NEXT: [[TMP24:%.*]] = atomicrmw and ptr [[TMP21]], i64 [[TMP23]] syncscope("wavefront-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP24]], ptr [[ATOMIC_TEMP16_ASCAST]], align 8
-// CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[ATOMIC_TEMP16_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP25]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP27:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP27]], ptr [[DOTATOMICTMP17_ASCAST]], align 8
-// CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTATOMICTMP17_ASCAST]], align 8
-// CHECK-NEXT: [[TMP29:%.*]] = atomicrmw or ptr [[TMP26]], i64 [[TMP28]] syncscope("wavefront-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP29]], ptr [[ATOMIC_TEMP18_ASCAST]], align 8
-// CHECK-NEXT: [[TMP30:%.*]] = load i64, ptr [[ATOMIC_TEMP18_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP30]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP32]], ptr [[DOTATOMICTMP19_ASCAST]], align 8
-// CHECK-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTATOMICTMP19_ASCAST]], align 8
-// CHECK-NEXT: [[TMP34:%.*]] = atomicrmw xor ptr [[TMP31]], i64 [[TMP33]] syncscope("wavefront-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP34]], ptr [[ATOMIC_TEMP20_ASCAST]], align 8
-// CHECK-NEXT: [[TMP35:%.*]] = load i64, ptr [[ATOMIC_TEMP20_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP35]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP37:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP37]], ptr [[DOTATOMICTMP21_ASCAST]], align 8
-// CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTATOMICTMP21_ASCAST]], align 8
-// CHECK-NEXT: [[TMP39:%.*]] = atomicrmw min ptr [[TMP36]], i64 [[TMP38]] syncscope("wavefront-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP39]], ptr [[ATOMIC_TEMP22_ASCAST]], align 8
-// CHECK-NEXT: [[TMP40:%.*]] = load i64, ptr [[ATOMIC_TEMP22_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP40]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP42:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP42]], ptr [[DOTATOMICTMP23_ASCAST]], align 8
-// CHECK-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTATOMICTMP23_ASCAST]], align 8
-// CHECK-NEXT: [[TMP44:%.*]] = atomicrmw max ptr [[TMP41]], i64 [[TMP43]] syncscope("wavefront-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP44]], ptr [[ATOMIC_TEMP24_ASCAST]], align 8
-// CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[ATOMIC_TEMP24_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP45]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP46:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP47:%.*]] = load atomic i64, ptr [[TMP46]] syncscope("wavefront-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP47]], ptr [[ATOMIC_TEMP25_ASCAST]], align 8
-// CHECK-NEXT: [[TMP48:%.*]] = load i64, ptr [[ATOMIC_TEMP25_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP48]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP49:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP50:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP50]], ptr [[DOTATOMICTMP26_ASCAST]], align 8
-// CHECK-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTATOMICTMP26_ASCAST]], align 8
-// CHECK-NEXT: store atomic i64 [[TMP51]], ptr [[TMP49]] syncscope("wavefront-one-as") monotonic, align 8
-// CHECK-NEXT: [[TMP52:%.*]] = load i8, ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV27:%.*]] = trunc i8 [[TMP52]] to i1
-// CHECK-NEXT: br i1 [[LOADEDV27]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
-// CHECK: [[COND_TRUE]]:
-// CHECK-NEXT: [[TMP53:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: br label %[[COND_END:.*]]
-// CHECK: [[COND_FALSE]]:
-// CHECK-NEXT: [[TMP54:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: br label %[[COND_END]]
-// CHECK: [[COND_END]]:
-// CHECK-NEXT: [[COND:%.*]] = phi i64 [ [[TMP53]], %[[COND_TRUE]] ], [ [[TMP54]], %[[COND_FALSE]] ]
-// CHECK-NEXT: ret i64 [[COND]]
-//
+// CHECK-LABEL: @_Z21atomic64_op_wavefrontPxS_xx
+// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") monotonic monotonic, align 8
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: load atomic i64, ptr {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 8
+// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("wavefront-one-as") monotonic, align 8
__device__ long long atomic64_op_wavefront(long long *ptr, long long *ptr2, long long val, long long desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
@@ -1608,64 +256,11 @@ __device__ long long atomic64_op_wavefront(long long *ptr, long long *ptr2, long
return flag ? val : desired;
}
-// CHECK-LABEL: define dso_local noundef i64 @_Z22atomicu64_op_wavefrontPyS_yy(
-// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i64 noundef [[VAL:%.*]], i64 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP4:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
-// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
-// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
-// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP3]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP4]] to ptr
-// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
-// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw umin ptr [[TMP0]], i64 [[TMP2]] syncscope("wavefront-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[ATOMIC_TEMP_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP4]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP6]], ptr [[DOTATOMICTMP1_ASCAST]], align 8
-// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
-// CHECK-NEXT: [[TMP8:%.*]] = atomicrmw umax ptr [[TMP5]], i64 [[TMP7]] syncscope("wavefront-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP8]], ptr [[ATOMIC_TEMP2_ASCAST]], align 8
-// CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[ATOMIC_TEMP2_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP9]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP11:%.*]] = load atomic i64, ptr [[TMP10]] syncscope("wavefront-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP11]], ptr [[ATOMIC_TEMP3_ASCAST]], align 8
-// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[ATOMIC_TEMP3_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP12]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP14]], ptr [[DOTATOMICTMP4_ASCAST]], align 8
-// CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTATOMICTMP4_ASCAST]], align 8
-// CHECK-NEXT: store atomic i64 [[TMP15]], ptr [[TMP13]] syncscope("wavefront-one-as") monotonic, align 8
-// CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: ret i64 [[TMP16]]
-//
+// CHECK-LABEL: @_Z22atomicu64_op_wavefrontPyS_yy
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: load atomic i64, ptr {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 8
+// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("wavefront-one-as") monotonic, align 8
__device__ unsigned long long atomicu64_op_wavefront(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
@@ -1674,175 +269,18 @@ __device__ unsigned long long atomicu64_op_wavefront(unsigned long long *ptr, un
return val;
}
-// CHECK-LABEL: define dso_local noundef i64 @_Z21atomic64_op_workgroupPxS_xx(
-// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i64 noundef [[VAL:%.*]], i64 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[FLAG:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP10:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP15:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP16:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP17:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP18:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP19:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP21:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP22:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP23:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP24:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP25:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
-// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
-// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
-// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
-// CHECK-NEXT: [[FLAG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAG]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
-// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
-// CHECK-NEXT: [[CMPXCHG_BOOL2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL2]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP10]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP15]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP16]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP17_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP17]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP18_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP18]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP19_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP19]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP20]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP21]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP22_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP22]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP23_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP23]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP24_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP24]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP25_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP25]] to ptr
-// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] syncscope("workgroup-one-as") monotonic monotonic, align 8
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
-// CHECK-NEXT: store i8 [[STOREDV]], ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP4]], ptr [[DOTATOMICTMP1_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_EXPECTED3:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i64 [[CMPXCHG_EXPECTED3]], i64 [[CMPXCHG_DESIRED4]] syncscope("workgroup-one-as") monotonic monotonic, align 8
-// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: store i64 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 1
-// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
-// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
-// CHECK-NEXT: store i8 [[STOREDV9]], ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP7]], ptr [[DOTATOMICTMP10_ASCAST]], align 8
-// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTATOMICTMP10_ASCAST]], align 8
-// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw xchg ptr [[TMP6]], i64 [[TMP8]] syncscope("workgroup-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP9]], ptr [[ATOMIC_TEMP_ASCAST]], align 8
-// CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[ATOMIC_TEMP_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP10]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP12]], ptr [[DOTATOMICTMP11_ASCAST]], align 8
-// CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTATOMICTMP11_ASCAST]], align 8
-// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP11]], i64 [[TMP13]] syncscope("workgroup-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP14]], ptr [[ATOMIC_TEMP12_ASCAST]], align 8
-// CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[ATOMIC_TEMP12_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP15]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP17]], ptr [[DOTATOMICTMP13_ASCAST]], align 8
-// CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTATOMICTMP13_ASCAST]], align 8
-// CHECK-NEXT: [[TMP19:%.*]] = atomicrmw sub ptr [[TMP16]], i64 [[TMP18]] syncscope("workgroup-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP19]], ptr [[ATOMIC_TEMP14_ASCAST]], align 8
-// CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr [[ATOMIC_TEMP14_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP20]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP22]], ptr [[DOTATOMICTMP15_ASCAST]], align 8
-// CHECK-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTATOMICTMP15_ASCAST]], align 8
-// CHECK-NEXT: [[TMP24:%.*]] = atomicrmw and ptr [[TMP21]], i64 [[TMP23]] syncscope("workgroup-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP24]], ptr [[ATOMIC_TEMP16_ASCAST]], align 8
-// CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[ATOMIC_TEMP16_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP25]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP27:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP27]], ptr [[DOTATOMICTMP17_ASCAST]], align 8
-// CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTATOMICTMP17_ASCAST]], align 8
-// CHECK-NEXT: [[TMP29:%.*]] = atomicrmw or ptr [[TMP26]], i64 [[TMP28]] syncscope("workgroup-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP29]], ptr [[ATOMIC_TEMP18_ASCAST]], align 8
-// CHECK-NEXT: [[TMP30:%.*]] = load i64, ptr [[ATOMIC_TEMP18_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP30]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP32]], ptr [[DOTATOMICTMP19_ASCAST]], align 8
-// CHECK-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTATOMICTMP19_ASCAST]], align 8
-// CHECK-NEXT: [[TMP34:%.*]] = atomicrmw xor ptr [[TMP31]], i64 [[TMP33]] syncscope("workgroup-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP34]], ptr [[ATOMIC_TEMP20_ASCAST]], align 8
-// CHECK-NEXT: [[TMP35:%.*]] = load i64, ptr [[ATOMIC_TEMP20_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP35]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP37:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP37]], ptr [[DOTATOMICTMP21_ASCAST]], align 8
-// CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTATOMICTMP21_ASCAST]], align 8
-// CHECK-NEXT: [[TMP39:%.*]] = atomicrmw min ptr [[TMP36]], i64 [[TMP38]] syncscope("workgroup-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP39]], ptr [[ATOMIC_TEMP22_ASCAST]], align 8
-// CHECK-NEXT: [[TMP40:%.*]] = load i64, ptr [[ATOMIC_TEMP22_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP40]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP42:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP42]], ptr [[DOTATOMICTMP23_ASCAST]], align 8
-// CHECK-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTATOMICTMP23_ASCAST]], align 8
-// CHECK-NEXT: [[TMP44:%.*]] = atomicrmw max ptr [[TMP41]], i64 [[TMP43]] syncscope("workgroup-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP44]], ptr [[ATOMIC_TEMP24_ASCAST]], align 8
-// CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[ATOMIC_TEMP24_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP45]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP46:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP47:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP47]], ptr [[DOTATOMICTMP25_ASCAST]], align 8
-// CHECK-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTATOMICTMP25_ASCAST]], align 8
-// CHECK-NEXT: store atomic i64 [[TMP48]], ptr [[TMP46]] syncscope("workgroup-one-as") monotonic, align 8
-// CHECK-NEXT: [[TMP49:%.*]] = load i8, ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV26:%.*]] = trunc i8 [[TMP49]] to i1
-// CHECK-NEXT: br i1 [[LOADEDV26]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
-// CHECK: [[COND_TRUE]]:
-// CHECK-NEXT: [[TMP50:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: br label %[[COND_END:.*]]
-// CHECK: [[COND_FALSE]]:
-// CHECK-NEXT: [[TMP51:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: br label %[[COND_END]]
-// CHECK: [[COND_END]]:
-// CHECK-NEXT: [[COND:%.*]] = phi i64 [ [[TMP50]], %[[COND_TRUE]] ], [ [[TMP51]], %[[COND_FALSE]] ]
-// CHECK-NEXT: ret i64 [[COND]]
-//
+// CHECK-LABEL: @_Z21atomic64_op_workgroupPxS_xx
+// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") monotonic monotonic, align 8
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("workgroup-one-as") monotonic, align 8
__device__ long long atomic64_op_workgroup(long long *ptr, long long *ptr2, long long val, long long desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
@@ -1858,57 +296,10 @@ __device__ long long atomic64_op_workgroup(long long *ptr, long long *ptr2, long
return flag ? val : desired;
}
-// CHECK-LABEL: define dso_local noundef i64 @_Z22atomicu64_op_workgroupPyS_yy(
-// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i64 noundef [[VAL:%.*]], i64 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
-// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
-// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
-// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr
-// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
-// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw umin ptr [[TMP0]], i64 [[TMP2]] syncscope("workgroup-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[ATOMIC_TEMP_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP4]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP6]], ptr [[DOTATOMICTMP1_ASCAST]], align 8
-// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
-// CHECK-NEXT: [[TMP8:%.*]] = atomicrmw umax ptr [[TMP5]], i64 [[TMP7]] syncscope("workgroup-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP8]], ptr [[ATOMIC_TEMP2_ASCAST]], align 8
-// CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[ATOMIC_TEMP2_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP9]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP11]], ptr [[DOTATOMICTMP3_ASCAST]], align 8
-// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTATOMICTMP3_ASCAST]], align 8
-// CHECK-NEXT: store atomic i64 [[TMP12]], ptr [[TMP10]] syncscope("workgroup-one-as") monotonic, align 8
-// CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: ret i64 [[TMP13]]
-//
+// CHECK-LABEL: @_Z22atomicu64_op_workgroupPyS_yy
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("workgroup-one-as") monotonic, align 8
__device__ unsigned long long atomicu64_op_workgroup(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
@@ -1916,175 +307,18 @@ __device__ unsigned long long atomicu64_op_workgroup(unsigned long long *ptr, un
return val;
}
-// CHECK-LABEL: define dso_local noundef i64 @_Z17atomic64_op_agentPxS_xx(
-// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i64 noundef [[VAL:%.*]], i64 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[FLAG:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP10:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP15:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP16:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP17:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP18:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP19:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP21:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP22:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP23:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP24:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP25:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
-// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
-// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
-// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
-// CHECK-NEXT: [[FLAG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAG]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
-// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
-// CHECK-NEXT: [[CMPXCHG_BOOL2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL2]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP10]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP15]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP16]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP17_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP17]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP18_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP18]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP19_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP19]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP20]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP21]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP22_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP22]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP23_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP23]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP24_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP24]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP25_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP25]] to ptr
-// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] syncscope("agent-one-as") monotonic monotonic, align 8
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
-// CHECK-NEXT: store i8 [[STOREDV]], ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP4]], ptr [[DOTATOMICTMP1_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_EXPECTED3:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i64 [[CMPXCHG_EXPECTED3]], i64 [[CMPXCHG_DESIRED4]] syncscope("agent-one-as") monotonic monotonic, align 8
-// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: store i64 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 1
-// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
-// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
-// CHECK-NEXT: store i8 [[STOREDV9]], ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP7]], ptr [[DOTATOMICTMP10_ASCAST]], align 8
-// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTATOMICTMP10_ASCAST]], align 8
-// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw xchg ptr [[TMP6]], i64 [[TMP8]] syncscope("agent-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP9]], ptr [[ATOMIC_TEMP_ASCAST]], align 8
-// CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[ATOMIC_TEMP_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP10]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP12]], ptr [[DOTATOMICTMP11_ASCAST]], align 8
-// CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTATOMICTMP11_ASCAST]], align 8
-// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP11]], i64 [[TMP13]] syncscope("agent-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP14]], ptr [[ATOMIC_TEMP12_ASCAST]], align 8
-// CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[ATOMIC_TEMP12_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP15]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP17]], ptr [[DOTATOMICTMP13_ASCAST]], align 8
-// CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTATOMICTMP13_ASCAST]], align 8
-// CHECK-NEXT: [[TMP19:%.*]] = atomicrmw sub ptr [[TMP16]], i64 [[TMP18]] syncscope("agent-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP19]], ptr [[ATOMIC_TEMP14_ASCAST]], align 8
-// CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr [[ATOMIC_TEMP14_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP20]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP22]], ptr [[DOTATOMICTMP15_ASCAST]], align 8
-// CHECK-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTATOMICTMP15_ASCAST]], align 8
-// CHECK-NEXT: [[TMP24:%.*]] = atomicrmw and ptr [[TMP21]], i64 [[TMP23]] syncscope("agent-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP24]], ptr [[ATOMIC_TEMP16_ASCAST]], align 8
-// CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[ATOMIC_TEMP16_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP25]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP27:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP27]], ptr [[DOTATOMICTMP17_ASCAST]], align 8
-// CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTATOMICTMP17_ASCAST]], align 8
-// CHECK-NEXT: [[TMP29:%.*]] = atomicrmw or ptr [[TMP26]], i64 [[TMP28]] syncscope("agent-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP29]], ptr [[ATOMIC_TEMP18_ASCAST]], align 8
-// CHECK-NEXT: [[TMP30:%.*]] = load i64, ptr [[ATOMIC_TEMP18_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP30]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP32]], ptr [[DOTATOMICTMP19_ASCAST]], align 8
-// CHECK-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTATOMICTMP19_ASCAST]], align 8
-// CHECK-NEXT: [[TMP34:%.*]] = atomicrmw xor ptr [[TMP31]], i64 [[TMP33]] syncscope("agent-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP34]], ptr [[ATOMIC_TEMP20_ASCAST]], align 8
-// CHECK-NEXT: [[TMP35:%.*]] = load i64, ptr [[ATOMIC_TEMP20_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP35]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP37:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP37]], ptr [[DOTATOMICTMP21_ASCAST]], align 8
-// CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTATOMICTMP21_ASCAST]], align 8
-// CHECK-NEXT: [[TMP39:%.*]] = atomicrmw min ptr [[TMP36]], i64 [[TMP38]] syncscope("agent-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP39]], ptr [[ATOMIC_TEMP22_ASCAST]], align 8
-// CHECK-NEXT: [[TMP40:%.*]] = load i64, ptr [[ATOMIC_TEMP22_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP40]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP42:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP42]], ptr [[DOTATOMICTMP23_ASCAST]], align 8
-// CHECK-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTATOMICTMP23_ASCAST]], align 8
-// CHECK-NEXT: [[TMP44:%.*]] = atomicrmw max ptr [[TMP41]], i64 [[TMP43]] syncscope("agent-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP44]], ptr [[ATOMIC_TEMP24_ASCAST]], align 8
-// CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[ATOMIC_TEMP24_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP45]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP46:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP47:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP47]], ptr [[DOTATOMICTMP25_ASCAST]], align 8
-// CHECK-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTATOMICTMP25_ASCAST]], align 8
-// CHECK-NEXT: store atomic i64 [[TMP48]], ptr [[TMP46]] syncscope("agent-one-as") monotonic, align 8
-// CHECK-NEXT: [[TMP49:%.*]] = load i8, ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV26:%.*]] = trunc i8 [[TMP49]] to i1
-// CHECK-NEXT: br i1 [[LOADEDV26]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
-// CHECK: [[COND_TRUE]]:
-// CHECK-NEXT: [[TMP50:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: br label %[[COND_END:.*]]
-// CHECK: [[COND_FALSE]]:
-// CHECK-NEXT: [[TMP51:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: br label %[[COND_END]]
-// CHECK: [[COND_END]]:
-// CHECK-NEXT: [[COND:%.*]] = phi i64 [ [[TMP50]], %[[COND_TRUE]] ], [ [[TMP51]], %[[COND_FALSE]] ]
-// CHECK-NEXT: ret i64 [[COND]]
-//
+// CHECK-LABEL: @_Z17atomic64_op_agentPxS_xx
+// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") monotonic monotonic, align 8
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("agent-one-as") monotonic, align 8
__device__ long long atomic64_op_agent(long long *ptr, long long *ptr2, long long val, long long desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
@@ -2100,57 +334,10 @@ __device__ long long atomic64_op_agent(long long *ptr, long long *ptr2, long lon
return flag ? val : desired;
}
-// CHECK-LABEL: define dso_local noundef i64 @_Z18atomicu64_op_agentPyS_yy(
-// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i64 noundef [[VAL:%.*]], i64 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
-// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
-// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
-// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr
-// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
-// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw umin ptr [[TMP0]], i64 [[TMP2]] syncscope("agent-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[ATOMIC_TEMP_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP4]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP6]], ptr [[DOTATOMICTMP1_ASCAST]], align 8
-// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
-// CHECK-NEXT: [[TMP8:%.*]] = atomicrmw umax ptr [[TMP5]], i64 [[TMP7]] syncscope("agent-one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP8]], ptr [[ATOMIC_TEMP2_ASCAST]], align 8
-// CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[ATOMIC_TEMP2_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP9]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP11]], ptr [[DOTATOMICTMP3_ASCAST]], align 8
-// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTATOMICTMP3_ASCAST]], align 8
-// CHECK-NEXT: store atomic i64 [[TMP12]], ptr [[TMP10]] syncscope("agent-one-as") monotonic, align 8
-// CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: ret i64 [[TMP13]]
-//
+// CHECK-LABEL: @_Z18atomicu64_op_agentPyS_yy
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("agent-one-as") monotonic, align 8
__device__ unsigned long long atomicu64_op_agent(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
@@ -2158,182 +345,19 @@ __device__ unsigned long long atomicu64_op_agent(unsigned long long *ptr, unsign
return val;
}
-// CHECK-LABEL: define dso_local noundef i64 @_Z18atomic64_op_systemPxS_xx(
-// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i64 noundef [[VAL:%.*]], i64 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[FLAG:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP10:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP15:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP16:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP17:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP18:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP19:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP20:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP21:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP22:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP23:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP24:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP25:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP26:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
-// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
-// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
-// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
-// CHECK-NEXT: [[FLAG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAG]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
-// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
-// CHECK-NEXT: [[CMPXCHG_BOOL2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL2]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP10]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP15]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP16]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP17_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP17]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP18_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP18]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP19_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP19]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP20]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP21]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP22_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP22]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP23_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP23]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP24_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP24]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP25_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP25]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP26_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP26]] to ptr
-// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] syncscope("one-as") monotonic monotonic, align 8
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
-// CHECK-NEXT: store i8 [[STOREDV]], ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP4]], ptr [[DOTATOMICTMP1_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_EXPECTED3:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i64 [[CMPXCHG_EXPECTED3]], i64 [[CMPXCHG_DESIRED4]] syncscope("one-as") monotonic monotonic, align 8
-// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: store i64 [[CMPXCHG_PREV6]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR5]], 1
-// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
-// CHECK-NEXT: [[STOREDV9:%.*]] = zext i1 [[LOADEDV8]] to i8
-// CHECK-NEXT: store i8 [[STOREDV9]], ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP7]], ptr [[DOTATOMICTMP10_ASCAST]], align 8
-// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTATOMICTMP10_ASCAST]], align 8
-// CHECK-NEXT: [[TMP9:%.*]] = atomicrmw xchg ptr [[TMP6]], i64 [[TMP8]] syncscope("one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP9]], ptr [[ATOMIC_TEMP_ASCAST]], align 8
-// CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[ATOMIC_TEMP_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP10]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP12]], ptr [[DOTATOMICTMP11_ASCAST]], align 8
-// CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTATOMICTMP11_ASCAST]], align 8
-// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP11]], i64 [[TMP13]] syncscope("one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP14]], ptr [[ATOMIC_TEMP12_ASCAST]], align 8
-// CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[ATOMIC_TEMP12_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP15]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP17]], ptr [[DOTATOMICTMP13_ASCAST]], align 8
-// CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTATOMICTMP13_ASCAST]], align 8
-// CHECK-NEXT: [[TMP19:%.*]] = atomicrmw sub ptr [[TMP16]], i64 [[TMP18]] syncscope("one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP19]], ptr [[ATOMIC_TEMP14_ASCAST]], align 8
-// CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr [[ATOMIC_TEMP14_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP20]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP22]], ptr [[DOTATOMICTMP15_ASCAST]], align 8
-// CHECK-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTATOMICTMP15_ASCAST]], align 8
-// CHECK-NEXT: [[TMP24:%.*]] = atomicrmw and ptr [[TMP21]], i64 [[TMP23]] syncscope("one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP24]], ptr [[ATOMIC_TEMP16_ASCAST]], align 8
-// CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[ATOMIC_TEMP16_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP25]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP27:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP27]], ptr [[DOTATOMICTMP17_ASCAST]], align 8
-// CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTATOMICTMP17_ASCAST]], align 8
-// CHECK-NEXT: [[TMP29:%.*]] = atomicrmw or ptr [[TMP26]], i64 [[TMP28]] syncscope("one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP29]], ptr [[ATOMIC_TEMP18_ASCAST]], align 8
-// CHECK-NEXT: [[TMP30:%.*]] = load i64, ptr [[ATOMIC_TEMP18_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP30]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP32]], ptr [[DOTATOMICTMP19_ASCAST]], align 8
-// CHECK-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTATOMICTMP19_ASCAST]], align 8
-// CHECK-NEXT: [[TMP34:%.*]] = atomicrmw xor ptr [[TMP31]], i64 [[TMP33]] syncscope("one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP34]], ptr [[ATOMIC_TEMP20_ASCAST]], align 8
-// CHECK-NEXT: [[TMP35:%.*]] = load i64, ptr [[ATOMIC_TEMP20_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP35]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP37:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP37]], ptr [[DOTATOMICTMP21_ASCAST]], align 8
-// CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTATOMICTMP21_ASCAST]], align 8
-// CHECK-NEXT: [[TMP39:%.*]] = atomicrmw min ptr [[TMP36]], i64 [[TMP38]] syncscope("one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP39]], ptr [[ATOMIC_TEMP22_ASCAST]], align 8
-// CHECK-NEXT: [[TMP40:%.*]] = load i64, ptr [[ATOMIC_TEMP22_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP40]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP42:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP42]], ptr [[DOTATOMICTMP23_ASCAST]], align 8
-// CHECK-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTATOMICTMP23_ASCAST]], align 8
-// CHECK-NEXT: [[TMP44:%.*]] = atomicrmw max ptr [[TMP41]], i64 [[TMP43]] syncscope("one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP44]], ptr [[ATOMIC_TEMP24_ASCAST]], align 8
-// CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[ATOMIC_TEMP24_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP45]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP46:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP47:%.*]] = load atomic i64, ptr [[TMP46]] syncscope("one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP47]], ptr [[ATOMIC_TEMP25_ASCAST]], align 8
-// CHECK-NEXT: [[TMP48:%.*]] = load i64, ptr [[ATOMIC_TEMP25_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP48]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP49:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP50:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP50]], ptr [[DOTATOMICTMP26_ASCAST]], align 8
-// CHECK-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTATOMICTMP26_ASCAST]], align 8
-// CHECK-NEXT: store atomic i64 [[TMP51]], ptr [[TMP49]] syncscope("one-as") monotonic, align 8
-// CHECK-NEXT: [[TMP52:%.*]] = load i8, ptr [[FLAG_ASCAST]], align 1
-// CHECK-NEXT: [[LOADEDV27:%.*]] = trunc i8 [[TMP52]] to i1
-// CHECK-NEXT: br i1 [[LOADEDV27]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
-// CHECK: [[COND_TRUE]]:
-// CHECK-NEXT: [[TMP53:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: br label %[[COND_END:.*]]
-// CHECK: [[COND_FALSE]]:
-// CHECK-NEXT: [[TMP54:%.*]] = load i64, ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: br label %[[COND_END]]
-// CHECK: [[COND_END]]:
-// CHECK-NEXT: [[COND:%.*]] = phi i64 [ [[TMP53]], %[[COND_TRUE]] ], [ [[TMP54]], %[[COND_FALSE]] ]
-// CHECK-NEXT: ret i64 [[COND]]
-//
+// CHECK-LABEL: @_Z18atomic64_op_systemPxS_xx
+// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
+// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") monotonic monotonic, align 8
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw sub ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
+// CHECK: load i64, ptr %{{.*}}, align 8
+// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("one-as") monotonic, align 8
__device__ long long atomic64_op_system(long long *ptr, long long *ptr2, long long val, long long desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
@@ -2350,64 +374,11 @@ __device__ long long atomic64_op_system(long long *ptr, long long *ptr2, long lo
return flag ? val : desired;
}
-// CHECK-LABEL: define dso_local noundef i64 @_Z19atomicu64_op_systemPyS_yy(
-// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i64 noundef [[VAL:%.*]], i64 noundef [[DESIRED:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DESIRED_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP4:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
-// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
-// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
-// CHECK-NEXT: [[DESIRED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED_ADDR]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
-// CHECK-NEXT: [[ATOMIC_TEMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP3]] to ptr
-// CHECK-NEXT: [[DOTATOMICTMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP4]] to ptr
-// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[DESIRED]], ptr [[DESIRED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTATOMICTMP_ASCAST]], align 8
-// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw umin ptr [[TMP0]], i64 [[TMP2]] syncscope("one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[ATOMIC_TEMP_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP4]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP6]], ptr [[DOTATOMICTMP1_ASCAST]], align 8
-// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTATOMICTMP1_ASCAST]], align 8
-// CHECK-NEXT: [[TMP8:%.*]] = atomicrmw umax ptr [[TMP5]], i64 [[TMP7]] syncscope("one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP8]], ptr [[ATOMIC_TEMP2_ASCAST]], align 8
-// CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[ATOMIC_TEMP2_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP9]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP11:%.*]] = load atomic i64, ptr [[TMP10]] syncscope("one-as") monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP11]], ptr [[ATOMIC_TEMP3_ASCAST]], align 8
-// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[ATOMIC_TEMP3_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP12]], ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP14]], ptr [[DOTATOMICTMP4_ASCAST]], align 8
-// CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTATOMICTMP4_ASCAST]], align 8
-// CHECK-NEXT: store atomic i64 [[TMP15]], ptr [[TMP13]] syncscope("one-as") monotonic, align 8
-// CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[VAL_ADDR_ASCAST]], align 8
-// CHECK-NEXT: ret i64 [[TMP16]]
-//
+// CHECK-LABEL: @_Z19atomicu64_op_systemPyS_yy
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
+// CHECK: load i64, ptr %{{.*}}, align 8
+// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("one-as") monotonic, align 8
__device__ unsigned long long atomicu64_op_system(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
diff --git a/clang/test/CodeGenCXX/atomic-inline.cpp b/clang/test/CodeGenCXX/atomic-inline.cpp
index dc0c30c8f76878..c8fa877a37beb5 100644
--- a/clang/test/CodeGenCXX/atomic-inline.cpp
+++ b/clang/test/CodeGenCXX/atomic-inline.cpp
@@ -1,4 +1,3 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 %s -std=c++11 -emit-llvm -o - -triple=x86_64-linux-gnu | FileCheck %s
// RUN: %clang_cc1 %s -std=c++11 -emit-llvm -o - -triple=x86_64-linux-gnu -target-cpu core2 | FileCheck %s --check-prefix=CORE2
// Check the atomic code generation for cpu targets w/wo cx16 support.
@@ -7,85 +6,31 @@ struct alignas(8) AM8 {
int f1, f2;
};
AM8 m8;
-// CHECK-LABEL: define dso_local i64 @_Z5load8v(
-// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_AM8:%.*]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load atomic i64, ptr @m8 monotonic, align 8
-// CHECK-NEXT: store i64 [[TMP0]], ptr [[RETVAL]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[RETVAL]], align 8
-// CHECK-NEXT: ret i64 [[TMP1]]
-//
-// CORE2-LABEL: define dso_local i64 @_Z5load8v(
-// CORE2-SAME: ) #[[ATTR0:[0-9]+]] {
-// CORE2-NEXT: [[ENTRY:.*:]]
-// CORE2-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_AM8:%.*]], align 8
-// CORE2-NEXT: [[TMP0:%.*]] = load atomic i64, ptr @m8 monotonic, align 8
-// CORE2-NEXT: store i64 [[TMP0]], ptr [[RETVAL]], align 8
-// CORE2-NEXT: [[TMP1:%.*]] = load i64, ptr [[RETVAL]], align 8
-// CORE2-NEXT: ret i64 [[TMP1]]
-//
AM8 load8() {
AM8 am;
+ // CHECK-LABEL: @_Z5load8v
+ // CHECK: load atomic i64, {{.*}} monotonic, align 8
+ // CORE2-LABEL: @_Z5load8v
+ // CORE2: load atomic i64, {{.*}} monotonic, align 8
__atomic_load(&m8, &am, 0);
return am;
}
AM8 s8;
-// CHECK-LABEL: define dso_local void @_Z6store8v(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @s8, align 8
-// CHECK-NEXT: store atomic i64 [[TMP0]], ptr @m8 monotonic, align 8
-// CHECK-NEXT: ret void
-//
-// CORE2-LABEL: define dso_local void @_Z6store8v(
-// CORE2-SAME: ) #[[ATTR0]] {
-// CORE2-NEXT: [[ENTRY:.*:]]
-// CORE2-NEXT: [[TMP0:%.*]] = load i64, ptr @s8, align 8
-// CORE2-NEXT: store atomic i64 [[TMP0]], ptr @m8 monotonic, align 8
-// CORE2-NEXT: ret void
-//
void store8() {
+ // CHECK-LABEL: @_Z6store8v
+ // CHECK: store atomic i64 {{.*}} monotonic, align 8
+ // CORE2-LABEL: @_Z6store8v
+ // CORE2: store atomic i64 {{.*}} monotonic, align 8
__atomic_store(&m8, &s8, 0);
}
-// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z8cmpxchg8v(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[AM:%.*]] = alloca [[STRUCT_AM8:%.*]], align 8
-// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i64, ptr @s8, align 8
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[AM]], align 8
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @m8, i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] monotonic monotonic, align 8
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i64 [[CMPXCHG_PREV]], ptr @s8, align 8
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[CMPXCHG_SUCCESS]] to i8
-// CHECK-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1
-// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1
-// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK-NEXT: ret i1 [[LOADEDV]]
-//
-// CORE2-LABEL: define dso_local noundef zeroext i1 @_Z8cmpxchg8v(
-// CORE2-SAME: ) #[[ATTR0]] {
-// CORE2-NEXT: [[ENTRY:.*:]]
-// CORE2-NEXT: [[AM:%.*]] = alloca [[STRUCT_AM8:%.*]], align 8
-// CORE2-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1
-// CORE2-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i64, ptr @s8, align 8
-// CORE2-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i64, ptr [[AM]], align 8
-// CORE2-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @m8, i64 [[CMPXCHG_EXPECTED]], i64 [[CMPXCHG_DESIRED]] monotonic monotonic, align 8
-// CORE2-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 0
-// CORE2-NEXT: store i64 [[CMPXCHG_PREV]], ptr @s8, align 8
-// CORE2-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i64, i1 } [[CMPXCHG_PAIR]], 1
-// CORE2-NEXT: [[STOREDV:%.*]] = zext i1 [[CMPXCHG_SUCCESS]] to i8
-// CORE2-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1
-// CORE2-NEXT: [[TMP0:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1
-// CORE2-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
-// CORE2-NEXT: ret i1 [[LOADEDV]]
-//
bool cmpxchg8() {
AM8 am;
+ // CHECK-LABEL: @_Z8cmpxchg8v
+ // CHECK: cmpxchg ptr {{.*}} monotonic, align 8
+ // CORE2-LABEL: @_Z8cmpxchg8v
+ // CORE2: cmpxchg ptr {{.*}} monotonic, align 8
return __atomic_compare_exchange(&m8, &s8, &am, 0, 0, 0);
}
@@ -94,81 +39,30 @@ struct alignas(16) AM16 {
};
AM16 m16;
-// CHECK-LABEL: define dso_local { i64, i64 } @_Z6load16v(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_AM16:%.*]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load atomic i128, ptr @m16 monotonic, align 16
-// CHECK-NEXT: store i128 [[TMP0]], ptr [[RETVAL]], align 16
-// CHECK-NEXT: [[TMP1:%.*]] = load { i64, i64 }, ptr [[RETVAL]], align 16
-// CHECK-NEXT: ret { i64, i64 } [[TMP1]]
-//
-// CORE2-LABEL: define dso_local { i64, i64 } @_Z6load16v(
-// CORE2-SAME: ) #[[ATTR0]] {
-// CORE2-NEXT: [[ENTRY:.*:]]
-// CORE2-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_AM16:%.*]], align 16
-// CORE2-NEXT: [[TMP0:%.*]] = load atomic i128, ptr @m16 monotonic, align 16
-// CORE2-NEXT: store i128 [[TMP0]], ptr [[RETVAL]], align 16
-// CORE2-NEXT: [[TMP1:%.*]] = load { i64, i64 }, ptr [[RETVAL]], align 16
-// CORE2-NEXT: ret { i64, i64 } [[TMP1]]
-//
AM16 load16() {
AM16 am;
+ // CHECK-LABEL: @_Z6load16v
+ // CHECK: load atomic i128, {{.*}} monotonic, align 16
+ // CORE2-LABEL: @_Z6load16v
+ // CORE2: load atomic i128, {{.*}} monotonic, align 16
__atomic_load(&m16, &am, 0);
return am;
}
AM16 s16;
-// CHECK-LABEL: define dso_local void @_Z7store16v(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @s16, align 16
-// CHECK-NEXT: store atomic i128 [[TMP0]], ptr @m16 monotonic, align 16
-// CHECK-NEXT: ret void
-//
-// CORE2-LABEL: define dso_local void @_Z7store16v(
-// CORE2-SAME: ) #[[ATTR0]] {
-// CORE2-NEXT: [[ENTRY:.*:]]
-// CORE2-NEXT: [[TMP0:%.*]] = load i128, ptr @s16, align 16
-// CORE2-NEXT: store atomic i128 [[TMP0]], ptr @m16 monotonic, align 16
-// CORE2-NEXT: ret void
-//
void store16() {
+ // CHECK-LABEL: @_Z7store16v
+ // CHECK: store atomic i128 {{.*}} monotonic, align 16
+ // CORE2-LABEL: @_Z7store16v
+ // CORE2: store atomic i128 {{.*}} monotonic, align 16
__atomic_store(&m16, &s16, 0);
}
-// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z9cmpxchg16v(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[AM:%.*]] = alloca [[STRUCT_AM16:%.*]], align 16
-// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i128, ptr [[AM]], align 16
-// CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_16:%.*]] = call i8 @__atomic_compare_exchange_16(ptr @m16, ptr @s16, i128 [[CMPXCHG_DESIRED]], i32 0, i32 0)
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_16]], 0
-// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[CMPXCHG_SUCCESS]] to i8
-// CHECK-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1
-// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1
-// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK-NEXT: ret i1 [[LOADEDV]]
-//
-// CORE2-LABEL: define dso_local noundef zeroext i1 @_Z9cmpxchg16v(
-// CORE2-SAME: ) #[[ATTR0]] {
-// CORE2-NEXT: [[ENTRY:.*:]]
-// CORE2-NEXT: [[AM:%.*]] = alloca [[STRUCT_AM16:%.*]], align 16
-// CORE2-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1
-// CORE2-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i128, ptr @s16, align 16
-// CORE2-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i128, ptr [[AM]], align 16
-// CORE2-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr @m16, i128 [[CMPXCHG_EXPECTED]], i128 [[CMPXCHG_DESIRED]] monotonic monotonic, align 16
-// CORE2-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 0
-// CORE2-NEXT: store i128 [[CMPXCHG_PREV]], ptr @s16, align 16
-// CORE2-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i128, i1 } [[CMPXCHG_PAIR]], 1
-// CORE2-NEXT: [[STOREDV:%.*]] = zext i1 [[CMPXCHG_SUCCESS]] to i8
-// CORE2-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1
-// CORE2-NEXT: [[TMP0:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1
-// CORE2-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
-// CORE2-NEXT: ret i1 [[LOADEDV]]
-//
bool cmpxchg16() {
AM16 am;
+ // CHECK-LABEL: @_Z9cmpxchg16v
+ // CHECK: cmpxchg ptr {{.*}} monotonic monotonic, align 16
+ // CORE2-LABEL: @_Z9cmpxchg16v
+ // CORE2: cmpxchg ptr {{.*}} monotonic monotonic, align 16
return __atomic_compare_exchange(&m16, &s16, &am, 0, 0, 0);
}
diff --git a/clang/test/CodeGenOpenCL/atomic-ops.cl b/clang/test/CodeGenOpenCL/atomic-ops.cl
index a5e731a3fe30bc..5e2de38ac3d3e3 100644
--- a/clang/test/CodeGenOpenCL/atomic-ops.cl
+++ b/clang/test/CodeGenOpenCL/atomic-ops.cl
@@ -1,4 +1,3 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa \
// RUN: | FileCheck %s
@@ -36,693 +35,309 @@ typedef enum memory_scope {
atomic_int j;
-// CHECK-LABEL: define dso_local void @fi1(
-// CHECK-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[I_ADDR]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("workgroup") seq_cst, align 4
-// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(5) [[X]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load atomic i32, ptr [[TMP3]] syncscope("agent") seq_cst, align 4
-// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(5) [[ATOMIC_TEMP1]], align 4
-// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP1]], align 4
-// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(5) [[X]], align 4
-// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
-// CHECK-NEXT: [[TMP7:%.*]] = load atomic i32, ptr [[TMP6]] seq_cst, align 4
-// CHECK-NEXT: store i32 [[TMP7]], ptr addrspace(5) [[ATOMIC_TEMP2]], align 4
-// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP2]], align 4
-// CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(5) [[X]], align 4
-// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
-// CHECK-NEXT: [[TMP10:%.*]] = load atomic i32, ptr [[TMP9]] syncscope("wavefront") seq_cst, align 4
-// CHECK-NEXT: store i32 [[TMP10]], ptr addrspace(5) [[ATOMIC_TEMP3]], align 4
-// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP3]], align 4
-// CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(5) [[X]], align 4
-// CHECK-NEXT: ret void
-//
void fi1(atomic_int *i) {
+ // CHECK-LABEL: @fi1
+ // CHECK: load atomic i32, ptr %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
int x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
+ // CHECK: load atomic i32, ptr %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst, align 4
x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device);
+ // CHECK: load atomic i32, ptr %{{[.0-9A-Z_a-z]+}} seq_cst, align 4
x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices);
+ // CHECK: load atomic i32, ptr %{{[.0-9A-Z_a-z]+}} syncscope("wavefront") seq_cst, align 4
x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group);
}
-// CHECK-LABEL: define dso_local void @fi2(
-// CHECK-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[I_ADDR]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
-// CHECK-NEXT: store i32 1, ptr addrspace(5) [[DOTATOMICTMP]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
-// CHECK-NEXT: store atomic i32 [[TMP1]], ptr [[TMP0]] syncscope("workgroup") seq_cst, align 4
-// CHECK-NEXT: ret void
-//
void fi2(atomic_int *i) {
+ // CHECK-LABEL: @fi2
+ // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, ptr %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
__opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group);
}
-// CHECK-LABEL: define dso_local void @test_addr(
-// CHECK-SAME: ptr addrspace(1) noundef [[IG:%.*]], ptr addrspace(5) noundef [[IP:%.*]], ptr addrspace(3) noundef [[IL:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[IG_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
-// CHECK-NEXT: [[IP_ADDR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// CHECK-NEXT: [[IL_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP2:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: store ptr addrspace(1) [[IG]], ptr addrspace(5) [[IG_ADDR]], align 8
-// CHECK-NEXT: store ptr addrspace(5) [[IP]], ptr addrspace(5) [[IP_ADDR]], align 4
-// CHECK-NEXT: store ptr addrspace(3) [[IL]], ptr addrspace(5) [[IL_ADDR]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IG_ADDR]], align 8
-// CHECK-NEXT: store i32 1, ptr addrspace(5) [[DOTATOMICTMP]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
-// CHECK-NEXT: store atomic i32 [[TMP1]], ptr addrspace(1) [[TMP0]] syncscope("workgroup") seq_cst, align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[IP_ADDR]], align 4
-// CHECK-NEXT: store i32 1, ptr addrspace(5) [[DOTATOMICTMP1]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP1]], align 4
-// CHECK-NEXT: store atomic i32 [[TMP3]], ptr addrspace(5) [[TMP2]] syncscope("workgroup") seq_cst, align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(5) [[IL_ADDR]], align 4
-// CHECK-NEXT: store i32 1, ptr addrspace(5) [[DOTATOMICTMP2]], align 4
-// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP2]], align 4
-// CHECK-NEXT: store atomic i32 [[TMP5]], ptr addrspace(3) [[TMP4]] syncscope("workgroup") seq_cst, align 4
-// CHECK-NEXT: ret void
-//
void test_addr(global atomic_int *ig, private atomic_int *ip, local atomic_int *il) {
+ // CHECK-LABEL: @test_addr
+ // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, ptr addrspace(1) %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
__opencl_atomic_store(ig, 1, memory_order_seq_cst, memory_scope_work_group);
+ // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, ptr addrspace(5) %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
__opencl_atomic_store(ip, 1, memory_order_seq_cst, memory_scope_work_group);
+ // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, ptr addrspace(3) %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
__opencl_atomic_store(il, 1, memory_order_seq_cst, memory_scope_work_group);
}
-// CHECK-LABEL: define dso_local void @fi3(
-// CHECK-SAME: ptr noundef [[I:%.*]], ptr noundef [[UI:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[UI_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP7:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[I_ADDR]], align 8
-// CHECK-NEXT: store ptr [[UI]], ptr addrspace(5) [[UI_ADDR]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
-// CHECK-NEXT: store i32 1, ptr addrspace(5) [[DOTATOMICTMP]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr [[TMP0]], i32 [[TMP1]] syncscope("workgroup") seq_cst, align 4
-// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[X]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
-// CHECK-NEXT: store i32 1, ptr addrspace(5) [[DOTATOMICTMP1]], align 4
-// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP1]], align 4
-// CHECK-NEXT: [[TMP6:%.*]] = atomicrmw min ptr [[TMP4]], i32 [[TMP5]] syncscope("workgroup") seq_cst, align 4
-// CHECK-NEXT: store i32 [[TMP6]], ptr addrspace(5) [[ATOMIC_TEMP2]], align 4
-// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP2]], align 4
-// CHECK-NEXT: store i32 [[TMP7]], ptr addrspace(5) [[X]], align 4
-// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
-// CHECK-NEXT: store i32 1, ptr addrspace(5) [[DOTATOMICTMP3]], align 4
-// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP3]], align 4
-// CHECK-NEXT: [[TMP10:%.*]] = atomicrmw max ptr [[TMP8]], i32 [[TMP9]] syncscope("workgroup") seq_cst, align 4
-// CHECK-NEXT: store i32 [[TMP10]], ptr addrspace(5) [[ATOMIC_TEMP4]], align 4
-// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP4]], align 4
-// CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(5) [[X]], align 4
-// CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr addrspace(5) [[UI_ADDR]], align 8
-// CHECK-NEXT: store i32 1, ptr addrspace(5) [[DOTATOMICTMP5]], align 4
-// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP5]], align 4
-// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw umin ptr [[TMP12]], i32 [[TMP13]] syncscope("workgroup") seq_cst, align 4
-// CHECK-NEXT: store i32 [[TMP14]], ptr addrspace(5) [[ATOMIC_TEMP6]], align 4
-// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP6]], align 4
-// CHECK-NEXT: store i32 [[TMP15]], ptr addrspace(5) [[X]], align 4
-// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr addrspace(5) [[UI_ADDR]], align 8
-// CHECK-NEXT: store i32 1, ptr addrspace(5) [[DOTATOMICTMP7]], align 4
-// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP7]], align 4
-// CHECK-NEXT: [[TMP18:%.*]] = atomicrmw umax ptr [[TMP16]], i32 [[TMP17]] syncscope("workgroup") seq_cst, align 4
-// CHECK-NEXT: store i32 [[TMP18]], ptr addrspace(5) [[ATOMIC_TEMP8]], align 4
-// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP8]], align 4
-// CHECK-NEXT: store i32 [[TMP19]], ptr addrspace(5) [[X]], align 4
-// CHECK-NEXT: ret void
-//
void fi3(atomic_int *i, atomic_uint *ui) {
+ // CHECK-LABEL: @fi3
+ // CHECK: atomicrmw and ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group);
+ // CHECK: atomicrmw min ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group);
+ // CHECK: atomicrmw max ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group);
+ // CHECK: atomicrmw umin ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group);
+ // CHECK: atomicrmw umax ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group);
}
-// CHECK-LABEL: define dso_local zeroext i1 @fi4(
-// CHECK-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[I_ADDR]], align 8
-// CHECK-NEXT: store i32 0, ptr addrspace(5) [[CMP]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
-// CHECK-NEXT: store i32 1, ptr addrspace(5) [[DOTATOMICTMP]], align 4
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr addrspace(5) [[CMP]], align 4
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("workgroup-one-as") acquire acquire, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr addrspace(5) [[CMP]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(5) [[CMPXCHG_BOOL]], align 1
-// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP1]] to i1
-// CHECK-NEXT: ret i1 [[LOADEDV]]
-//
bool fi4(atomic_int *i) {
+ // CHECK-LABEL: @fi4(
+ // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg ptr [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup-one-as") acquire acquire, align 4
+ // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
+ // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
+ // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
+ // CHECK: store i32 [[OLD]]
int cmp = 0;
return __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire, memory_scope_work_group);
}
-// CHECK-LABEL: define dso_local void @fi5(
-// CHECK-SAME: ptr noundef [[I:%.*]], i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[I_ADDR]], align 8
-// CHECK-NEXT: store i32 [[SCOPE]], ptr addrspace(5) [[SCOPE_ADDR]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[SCOPE_ADDR]], align 4
-// CHECK-NEXT: switch i32 [[TMP1]], label %[[OPENCL_ALLSVMDEVICES:.*]] [
-// CHECK-NEXT: i32 1, label %[[OPENCL_WORKGROUP:.*]]
-// CHECK-NEXT: i32 2, label %[[OPENCL_DEVICE:.*]]
-// CHECK-NEXT: i32 4, label %[[OPENCL_SUBGROUP:.*]]
-// CHECK-NEXT: ]
-// CHECK: [[OPENCL_WORKGROUP]]:
-// CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("workgroup") seq_cst, align 4
-// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE:.*]]
-// CHECK: [[OPENCL_DEVICE]]:
-// CHECK-NEXT: [[TMP3:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("agent") seq_cst, align 4
-// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// CHECK: [[OPENCL_ALLSVMDEVICES]]:
-// CHECK-NEXT: [[TMP4:%.*]] = load atomic i32, ptr [[TMP0]] seq_cst, align 4
-// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// CHECK: [[OPENCL_SUBGROUP]]:
-// CHECK-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("wavefront") seq_cst, align 4
-// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// CHECK: [[ATOMIC_SCOPE_CONTINUE]]:
-// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: store i32 [[TMP6]], ptr addrspace(5) [[X]], align 4
-// CHECK-NEXT: ret void
-//
void fi5(atomic_int *i, int scope) {
+ // CHECK-LABEL: @fi5
+ // CHECK: switch i32 %{{.*}}, label %[[opencl_allsvmdevices:.*]] [
+ // CHECK-NEXT: i32 1, label %[[opencl_workgroup:.*]]
+ // CHECK-NEXT: i32 2, label %[[opencl_device:.*]]
+ // CHECK-NEXT: i32 4, label %[[opencl_subgroup:.*]]
+ // CHECK-NEXT: ]
+ // CHECK: [[opencl_workgroup]]:
+ // CHECK: load atomic i32, ptr %{{.*}} syncscope("workgroup") seq_cst, align 4
+ // CHECK: br label %[[continue:.*]]
+ // CHECK: [[opencl_device]]:
+ // CHECK: load atomic i32, ptr %{{.*}} syncscope("agent") seq_cst, align 4
+ // CHECK: br label %[[continue]]
+ // CHECK: [[opencl_allsvmdevices]]:
+ // CHECK: load atomic i32, ptr %{{.*}} seq_cst, align 4
+ // CHECK: br label %[[continue]]
+ // CHECK: [[opencl_subgroup]]:
+ // CHECK: load atomic i32, ptr %{{.*}} syncscope("wavefront") seq_cst, align 4
+ // CHECK: br label %[[continue]]
+ // CHECK: [[continue]]:
int x = __opencl_atomic_load(i, memory_order_seq_cst, scope);
}
-// CHECK-LABEL: define dso_local void @fi6(
-// CHECK-SAME: ptr noundef [[I:%.*]], i32 noundef [[ORDER:%.*]], i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[ORDER_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[I_ADDR]], align 8
-// CHECK-NEXT: store i32 [[ORDER]], ptr addrspace(5) [[ORDER_ADDR]], align 4
-// CHECK-NEXT: store i32 [[SCOPE]], ptr addrspace(5) [[SCOPE_ADDR]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[ORDER_ADDR]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[SCOPE_ADDR]], align 4
-// CHECK-NEXT: switch i32 [[TMP1]], label %[[MONOTONIC:.*]] [
-// CHECK-NEXT: i32 1, label %[[ACQUIRE:.*]]
-// CHECK-NEXT: i32 2, label %[[ACQUIRE]]
-// CHECK-NEXT: i32 5, label %[[SEQCST:.*]]
-// CHECK-NEXT: ]
-// CHECK: [[MONOTONIC]]:
-// CHECK-NEXT: switch i32 [[TMP2]], label %[[OPENCL_ALLSVMDEVICES:.*]] [
-// CHECK-NEXT: i32 1, label %[[OPENCL_WORKGROUP:.*]]
-// CHECK-NEXT: i32 2, label %[[OPENCL_DEVICE:.*]]
-// CHECK-NEXT: i32 4, label %[[OPENCL_SUBGROUP:.*]]
-// CHECK-NEXT: ]
-// CHECK: [[ACQUIRE]]:
-// CHECK-NEXT: switch i32 [[TMP2]], label %[[OPENCL_ALLSVMDEVICES3:.*]] [
-// CHECK-NEXT: i32 1, label %[[OPENCL_WORKGROUP1:.*]]
-// CHECK-NEXT: i32 2, label %[[OPENCL_DEVICE2:.*]]
-// CHECK-NEXT: i32 4, label %[[OPENCL_SUBGROUP4:.*]]
-// CHECK-NEXT: ]
-// CHECK: [[SEQCST]]:
-// CHECK-NEXT: switch i32 [[TMP2]], label %[[OPENCL_ALLSVMDEVICES8:.*]] [
-// CHECK-NEXT: i32 1, label %[[OPENCL_WORKGROUP6:.*]]
-// CHECK-NEXT: i32 2, label %[[OPENCL_DEVICE7:.*]]
-// CHECK-NEXT: i32 4, label %[[OPENCL_SUBGROUP9:.*]]
-// CHECK-NEXT: ]
-// CHECK: [[ATOMIC_CONTINUE:.*]]:
-// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[X]], align 4
-// CHECK-NEXT: ret void
-// CHECK: [[OPENCL_WORKGROUP]]:
-// CHECK-NEXT: [[TMP4:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE:.*]]
-// CHECK: [[OPENCL_DEVICE]]:
-// CHECK-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("agent-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// CHECK: [[OPENCL_ALLSVMDEVICES]]:
-// CHECK-NEXT: [[TMP6:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP6]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// CHECK: [[OPENCL_SUBGROUP]]:
-// CHECK-NEXT: [[TMP7:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP7]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// CHECK: [[ATOMIC_SCOPE_CONTINUE]]:
-// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
-// CHECK: [[OPENCL_WORKGROUP1]]:
-// CHECK-NEXT: [[TMP8:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("workgroup-one-as") acquire, align 4
-// CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE5:.*]]
-// CHECK: [[OPENCL_DEVICE2]]:
-// CHECK-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("agent-one-as") acquire, align 4
-// CHECK-NEXT: store i32 [[TMP9]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE5]]
-// CHECK: [[OPENCL_ALLSVMDEVICES3]]:
-// CHECK-NEXT: [[TMP10:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("one-as") acquire, align 4
-// CHECK-NEXT: store i32 [[TMP10]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE5]]
-// CHECK: [[OPENCL_SUBGROUP4]]:
-// CHECK-NEXT: [[TMP11:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("wavefront-one-as") acquire, align 4
-// CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE5]]
-// CHECK: [[ATOMIC_SCOPE_CONTINUE5]]:
-// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
-// CHECK: [[OPENCL_WORKGROUP6]]:
-// CHECK-NEXT: [[TMP12:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("workgroup") seq_cst, align 4
-// CHECK-NEXT: store i32 [[TMP12]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE10:.*]]
-// CHECK: [[OPENCL_DEVICE7]]:
-// CHECK-NEXT: [[TMP13:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("agent") seq_cst, align 4
-// CHECK-NEXT: store i32 [[TMP13]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE10]]
-// CHECK: [[OPENCL_ALLSVMDEVICES8]]:
-// CHECK-NEXT: [[TMP14:%.*]] = load atomic i32, ptr [[TMP0]] seq_cst, align 4
-// CHECK-NEXT: store i32 [[TMP14]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE10]]
-// CHECK: [[OPENCL_SUBGROUP9]]:
-// CHECK-NEXT: [[TMP15:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("wavefront") seq_cst, align 4
-// CHECK-NEXT: store i32 [[TMP15]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE10]]
-// CHECK: [[ATOMIC_SCOPE_CONTINUE10]]:
-// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
-//
void fi6(atomic_int *i, int order, int scope) {
+ // CHECK-LABEL: @fi6
+ // CHECK: switch i32 %{{.*}}, label %[[monotonic:.*]] [
+ // CHECK-NEXT: i32 1, label %[[acquire:.*]]
+ // CHECK-NEXT: i32 2, label %[[acquire:.*]]
+ // CHECK-NEXT: i32 5, label %[[seqcst:.*]]
+ // CHECK-NEXT: ]
+ // CHECK: [[monotonic]]:
+ // CHECK: switch i32 %{{.*}}, label %[[MON_ALL:.*]] [
+ // CHECK-NEXT: i32 1, label %[[MON_WG:.*]]
+ // CHECK-NEXT: i32 2, label %[[MON_DEV:.*]]
+ // CHECK-NEXT: i32 4, label %[[MON_SUB:.*]]
+ // CHECK-NEXT: ]
+ // CHECK: [[acquire]]:
+ // CHECK: switch i32 %{{.*}}, label %[[ACQ_ALL:.*]] [
+ // CHECK-NEXT: i32 1, label %[[ACQ_WG:.*]]
+ // CHECK-NEXT: i32 2, label %[[ACQ_DEV:.*]]
+ // CHECK-NEXT: i32 4, label %[[ACQ_SUB:.*]]
+ // CHECK-NEXT: ]
+ // CHECK: [[seqcst]]:
+ // CHECK: switch i32 %{{.*}}, label %[[SEQ_ALL:.*]] [
+ // CHECK-NEXT: i32 1, label %[[SEQ_WG:.*]]
+ // CHECK-NEXT: i32 2, label %[[SEQ_DEV:.*]]
+ // CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]]
+ // CHECK-NEXT: ]
+ // CHECK: [[MON_WG]]:
+ // CHECK: load atomic i32, ptr %{{.*}} syncscope("workgroup-one-as") monotonic, align 4
+ // CHECK: [[MON_DEV]]:
+ // CHECK: load atomic i32, ptr %{{.*}} syncscope("agent-one-as") monotonic, align 4
+ // CHECK: [[MON_ALL]]:
+ // CHECK: load atomic i32, ptr %{{.*}} monotonic, align 4
+ // CHECK: [[MON_SUB]]:
+ // CHECK: load atomic i32, ptr %{{.*}} syncscope("wavefront-one-as") monotonic, align 4
+ // CHECK: [[ACQ_WG]]:
+ // CHECK: load atomic i32, ptr %{{.*}} syncscope("workgroup-one-as") acquire, align 4
+ // CHECK: [[ACQ_DEV]]:
+ // CHECK: load atomic i32, ptr %{{.*}} syncscope("agent-one-as") acquire, align 4
+ // CHECK: [[ACQ_ALL]]:
+ // CHECK: load atomic i32, ptr %{{.*}} acquire, align 4
+ // CHECK: [[ACQ_SUB]]:
+ // CHECK: load atomic i32, ptr %{{.*}} syncscope("wavefront-one-as") acquire, align 4
+ // CHECK: [[SEQ_WG]]:
+ // CHECK: load atomic i32, ptr %{{.*}} syncscope("workgroup") seq_cst, align 4
+ // CHECK: [[SEQ_DEV]]:
+ // CHECK: load atomic i32, ptr %{{.*}} syncscope("agent") seq_cst, align 4
+ // CHECK: [[SEQ_ALL]]:
+ // CHECK: load atomic i32, ptr %{{.*}} seq_cst, align 4
+ // CHECK: [[SEQ_SUB]]:
+ // CHECK: load atomic i32, ptr %{{.*}} syncscope("wavefront") seq_cst, align 4
int x = __opencl_atomic_load(i, order, scope);
}
-// CHECK-LABEL: define dso_local float @ff1(
-// CHECK-SAME: ptr addrspace(1) noundef [[D:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: store ptr addrspace(1) [[D]], ptr addrspace(5) [[D_ADDR]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[D_ADDR]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr addrspace(1) [[TMP0]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: ret float [[TMP2]]
-//
float ff1(global atomic_float *d) {
+ // CHECK-LABEL: @ff1
+ // CHECK: load atomic i32, ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic, align 4
return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_group);
}
-// CHECK-LABEL: define dso_local void @ff2(
-// CHECK-SAME: ptr noundef [[D:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: store ptr [[D]], ptr addrspace(5) [[D_ADDR]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[D_ADDR]], align 8
-// CHECK-NEXT: store float 1.000000e+00, ptr addrspace(5) [[DOTATOMICTMP]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
-// CHECK-NEXT: store atomic i32 [[TMP1]], ptr [[TMP0]] syncscope("workgroup-one-as") release, align 4
-// CHECK-NEXT: ret void
-//
void ff2(atomic_float *d) {
+ // CHECK-LABEL: @ff2
+ // CHECK: store atomic i32 {{.*}} syncscope("workgroup-one-as") release, align 4
__opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_group);
}
-// CHECK-LABEL: define dso_local float @ff3(
-// CHECK-SAME: ptr noundef [[D:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: store ptr [[D]], ptr addrspace(5) [[D_ADDR]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[D_ADDR]], align 8
-// CHECK-NEXT: store float 2.000000e+00, ptr addrspace(5) [[DOTATOMICTMP]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i32 [[TMP1]] syncscope("workgroup") seq_cst, align 4
-// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: ret float [[TMP3]]
-//
float ff3(atomic_float *d) {
+ // CHECK-LABEL: @ff3
+ // CHECK: atomicrmw xchg ptr {{.*}} syncscope("workgroup") seq_cst, align 4
return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group);
}
-// CHECK-LABEL: define dso_local float @ff4(
-// CHECK-SAME: ptr addrspace(1) noundef [[D:%.*]], float noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
-// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: store ptr addrspace(1) [[D]], ptr addrspace(5) [[D_ADDR]], align 8
-// CHECK-NEXT: store float [[A]], ptr addrspace(5) [[A_ADDR]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[D_ADDR]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[A_ADDR]], align 4
-// CHECK-NEXT: store float [[TMP1]], ptr addrspace(5) [[DOTATOMICTMP]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[DOTATOMICTMP]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP0]], float [[TMP2]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK-NEXT: store float [[TMP3]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: ret float [[TMP4]]
-//
float ff4(global atomic_float *d, float a) {
+ // CHECK-LABEL: @ff4
+ // CHECK: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic
return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group);
}
-// CHECK-LABEL: define dso_local float @ff5(
-// CHECK-SAME: ptr addrspace(1) noundef [[D:%.*]], double noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
-// CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-NEXT: store ptr addrspace(1) [[D]], ptr addrspace(5) [[D_ADDR]], align 8
-// CHECK-NEXT: store double [[A]], ptr addrspace(5) [[A_ADDR]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[D_ADDR]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(5) [[A_ADDR]], align 8
-// CHECK-NEXT: store double [[TMP1]], ptr addrspace(5) [[DOTATOMICTMP]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load double, ptr addrspace(5) [[DOTATOMICTMP]], align 8
-// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP0]], double [[TMP2]] syncscope("workgroup-one-as") monotonic, align 8
-// CHECK-NEXT: store double [[TMP3]], ptr addrspace(5) [[ATOMIC_TEMP]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load double, ptr addrspace(5) [[ATOMIC_TEMP]], align 8
-// CHECK-NEXT: [[CONV:%.*]] = fptrunc double [[TMP4]] to float
-// CHECK-NEXT: ret float [[CONV]]
-//
float ff5(global atomic_double *d, double a) {
+ // CHECK-LABEL: @ff5
+ // CHECK: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic
return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group);
}
-// CHECK-LABEL: define dso_local void @atomic_init_foo(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: store i32 42, ptr addrspace(1) @j, align 4
-// CHECK-NEXT: ret void
-//
+// CHECK-LABEL: @atomic_init_foo
void atomic_init_foo()
{
+ // CHECK-NOT: atomic
+ // CHECK: store
__opencl_atomic_init(&j, 42);
+ // CHECK-NOT: atomic
+ // CHECK: }
}
-// CHECK-LABEL: define dso_local void @failureOrder(
-// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: store ptr [[PTR]], ptr addrspace(5) [[PTR_ADDR]], align 8
-// CHECK-NEXT: store ptr [[PTR2]], ptr addrspace(5) [[PTR2_ADDR]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[PTR_ADDR]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr addrspace(5) [[PTR2_ADDR]], align 8
-// CHECK-NEXT: store i32 43, ptr addrspace(5) [[DOTATOMICTMP]], align 4
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("workgroup-one-as") acquire monotonic, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[TMP1]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(5) [[CMPXCHG_BOOL]], align 1
-// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr addrspace(5) [[PTR_ADDR]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr addrspace(5) [[PTR2_ADDR]], align 8
-// CHECK-NEXT: store i32 43, ptr addrspace(5) [[DOTATOMICTMP1]], align 4
-// CHECK-NEXT: [[CMPXCHG_EXPECTED3:%.*]] = load i32, ptr [[TMP4]], align 4
-// CHECK-NEXT: [[CMPXCHG_DESIRED4:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP1]], align 4
-// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg weak ptr [[TMP3]], i32 [[CMPXCHG_EXPECTED3]], i32 [[CMPXCHG_DESIRED4]] syncscope("workgroup") seq_cst acquire, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV6]], ptr [[TMP4]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
-// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr addrspace(5) [[CMPXCHG_BOOL2]], align 1
-// CHECK-NEXT: [[LOADEDV8:%.*]] = trunc i8 [[TMP5]] to i1
-// CHECK-NEXT: ret void
-//
+// CHECK-LABEL: @failureOrder
void failureOrder(atomic_int *ptr, int *ptr2) {
+ // CHECK: cmpxchg ptr {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup-one-as") acquire monotonic, align 4
__opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group);
+ // CHECK: cmpxchg weak ptr {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire, align 4
__opencl_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire, memory_scope_work_group);
}
-// CHECK-LABEL: define dso_local void @generalFailureOrder(
-// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i32 noundef [[SUCCESS:%.*]], i32 noundef [[FAIL:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[SUCCESS_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[FAIL_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: store ptr [[PTR]], ptr addrspace(5) [[PTR_ADDR]], align 8
-// CHECK-NEXT: store ptr [[PTR2]], ptr addrspace(5) [[PTR2_ADDR]], align 8
-// CHECK-NEXT: store i32 [[SUCCESS]], ptr addrspace(5) [[SUCCESS_ADDR]], align 4
-// CHECK-NEXT: store i32 [[FAIL]], ptr addrspace(5) [[FAIL_ADDR]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[PTR_ADDR]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[SUCCESS_ADDR]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[PTR2_ADDR]], align 8
-// CHECK-NEXT: store i32 42, ptr addrspace(5) [[DOTATOMICTMP]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(5) [[FAIL_ADDR]], align 4
-// CHECK-NEXT: switch i32 [[TMP1]], label %[[MONOTONIC:.*]] [
-// CHECK-NEXT: i32 1, label %[[ACQUIRE:.*]]
-// CHECK-NEXT: i32 2, label %[[ACQUIRE]]
-// CHECK-NEXT: i32 3, label %[[RELEASE:.*]]
-// CHECK-NEXT: i32 4, label %[[ACQREL:.*]]
-// CHECK-NEXT: i32 5, label %[[SEQCST:.*]]
-// CHECK-NEXT: ]
-// CHECK: [[MONOTONIC]]:
-// CHECK-NEXT: [[CMPXCHG_EXPECTED:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
-// CHECK-NEXT: switch i32 [[TMP3]], label %[[MONOTONIC_FAIL:.*]] [
-// CHECK-NEXT: i32 1, label %[[ACQUIRE_FAIL:.*]]
-// CHECK-NEXT: i32 2, label %[[ACQUIRE_FAIL]]
-// CHECK-NEXT: i32 5, label %[[SEQCST_FAIL:.*]]
-// CHECK-NEXT: ]
-// CHECK: [[ACQUIRE]]:
-// CHECK-NEXT: [[CMPXCHG_EXPECTED8:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-NEXT: [[CMPXCHG_DESIRED9:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
-// CHECK-NEXT: switch i32 [[TMP3]], label %[[MONOTONIC_FAIL10:.*]] [
-// CHECK-NEXT: i32 1, label %[[ACQUIRE_FAIL11:.*]]
-// CHECK-NEXT: i32 2, label %[[ACQUIRE_FAIL11]]
-// CHECK-NEXT: i32 5, label %[[SEQCST_FAIL12:.*]]
-// CHECK-NEXT: ]
-// CHECK: [[RELEASE]]:
-// CHECK-NEXT: [[CMPXCHG_EXPECTED24:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-NEXT: [[CMPXCHG_DESIRED25:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
-// CHECK-NEXT: switch i32 [[TMP3]], label %[[MONOTONIC_FAIL26:.*]] [
-// CHECK-NEXT: i32 1, label %[[ACQUIRE_FAIL27:.*]]
-// CHECK-NEXT: i32 2, label %[[ACQUIRE_FAIL27]]
-// CHECK-NEXT: i32 5, label %[[SEQCST_FAIL28:.*]]
-// CHECK-NEXT: ]
-// CHECK: [[ACQREL]]:
-// CHECK-NEXT: [[CMPXCHG_EXPECTED40:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-NEXT: [[CMPXCHG_DESIRED41:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
-// CHECK-NEXT: switch i32 [[TMP3]], label %[[MONOTONIC_FAIL42:.*]] [
-// CHECK-NEXT: i32 1, label %[[ACQUIRE_FAIL43:.*]]
-// CHECK-NEXT: i32 2, label %[[ACQUIRE_FAIL43]]
-// CHECK-NEXT: i32 5, label %[[SEQCST_FAIL44:.*]]
-// CHECK-NEXT: ]
-// CHECK: [[SEQCST]]:
-// CHECK-NEXT: [[CMPXCHG_EXPECTED56:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-NEXT: [[CMPXCHG_DESIRED57:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]], align 4
-// CHECK-NEXT: switch i32 [[TMP3]], label %[[MONOTONIC_FAIL58:.*]] [
-// CHECK-NEXT: i32 1, label %[[ACQUIRE_FAIL59:.*]]
-// CHECK-NEXT: i32 2, label %[[ACQUIRE_FAIL59]]
-// CHECK-NEXT: i32 5, label %[[SEQCST_FAIL60:.*]]
-// CHECK-NEXT: ]
-// CHECK: [[ATOMIC_CONTINUE:.*]]:
-// CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr addrspace(5) [[CMPXCHG_BOOL]], align 1
-// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP4]] to i1
-// CHECK-NEXT: ret void
-// CHECK: [[MONOTONIC_FAIL]]:
-// CHECK-NEXT: [[CMPXCHG_PAIR:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("workgroup-one-as") monotonic monotonic, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV]], ptr [[TMP2]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR]], 1
-// CHECK-NEXT: br label %[[ATOMIC_CONTINUE1:.*]]
-// CHECK: [[ACQUIRE_FAIL]]:
-// CHECK-NEXT: [[CMPXCHG_PAIR2:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("workgroup-one-as") monotonic acquire, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV3:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR2]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV3]], ptr [[TMP2]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS4:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR2]], 1
-// CHECK-NEXT: br label %[[ATOMIC_CONTINUE1]]
-// CHECK: [[SEQCST_FAIL]]:
-// CHECK-NEXT: [[CMPXCHG_PAIR5:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED]], i32 [[CMPXCHG_DESIRED]] syncscope("workgroup-one-as") monotonic seq_cst, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV6:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV6]], ptr [[TMP2]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS7:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR5]], 1
-// CHECK-NEXT: br label %[[ATOMIC_CONTINUE1]]
-// CHECK: [[ATOMIC_CONTINUE1]]:
-// CHECK-NEXT: [[CMPXCGH_SUCCESS:%.*]] = phi i1 [ [[CMPXCHG_SUCCESS]], %[[MONOTONIC_FAIL]] ], [ [[CMPXCHG_SUCCESS4]], %[[ACQUIRE_FAIL]] ], [ [[CMPXCHG_SUCCESS7]], %[[SEQCST_FAIL]] ]
-// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
-// CHECK: [[MONOTONIC_FAIL10]]:
-// CHECK-NEXT: [[CMPXCHG_PAIR14:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED8]], i32 [[CMPXCHG_DESIRED9]] syncscope("workgroup-one-as") acquire monotonic, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV15:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR14]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV15]], ptr [[TMP2]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS16:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR14]], 1
-// CHECK-NEXT: br label %[[ATOMIC_CONTINUE13:.*]]
-// CHECK: [[ACQUIRE_FAIL11]]:
-// CHECK-NEXT: [[CMPXCHG_PAIR17:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED8]], i32 [[CMPXCHG_DESIRED9]] syncscope("workgroup-one-as") acquire acquire, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV18:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR17]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV18]], ptr [[TMP2]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS19:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR17]], 1
-// CHECK-NEXT: br label %[[ATOMIC_CONTINUE13]]
-// CHECK: [[SEQCST_FAIL12]]:
-// CHECK-NEXT: [[CMPXCHG_PAIR20:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED8]], i32 [[CMPXCHG_DESIRED9]] syncscope("workgroup-one-as") acquire seq_cst, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV21:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR20]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV21]], ptr [[TMP2]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS22:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR20]], 1
-// CHECK-NEXT: br label %[[ATOMIC_CONTINUE13]]
-// CHECK: [[ATOMIC_CONTINUE13]]:
-// CHECK-NEXT: [[CMPXCGH_SUCCESS23:%.*]] = phi i1 [ [[CMPXCHG_SUCCESS16]], %[[MONOTONIC_FAIL10]] ], [ [[CMPXCHG_SUCCESS19]], %[[ACQUIRE_FAIL11]] ], [ [[CMPXCHG_SUCCESS22]], %[[SEQCST_FAIL12]] ]
-// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
-// CHECK: [[MONOTONIC_FAIL26]]:
-// CHECK-NEXT: [[CMPXCHG_PAIR30:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED24]], i32 [[CMPXCHG_DESIRED25]] syncscope("workgroup-one-as") release monotonic, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV31:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR30]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV31]], ptr [[TMP2]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS32:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR30]], 1
-// CHECK-NEXT: br label %[[ATOMIC_CONTINUE29:.*]]
-// CHECK: [[ACQUIRE_FAIL27]]:
-// CHECK-NEXT: [[CMPXCHG_PAIR33:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED24]], i32 [[CMPXCHG_DESIRED25]] syncscope("workgroup-one-as") release acquire, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV34:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR33]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV34]], ptr [[TMP2]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS35:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR33]], 1
-// CHECK-NEXT: br label %[[ATOMIC_CONTINUE29]]
-// CHECK: [[SEQCST_FAIL28]]:
-// CHECK-NEXT: [[CMPXCHG_PAIR36:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED24]], i32 [[CMPXCHG_DESIRED25]] syncscope("workgroup-one-as") release seq_cst, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV37:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR36]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV37]], ptr [[TMP2]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS38:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR36]], 1
-// CHECK-NEXT: br label %[[ATOMIC_CONTINUE29]]
-// CHECK: [[ATOMIC_CONTINUE29]]:
-// CHECK-NEXT: [[CMPXCGH_SUCCESS39:%.*]] = phi i1 [ [[CMPXCHG_SUCCESS32]], %[[MONOTONIC_FAIL26]] ], [ [[CMPXCHG_SUCCESS35]], %[[ACQUIRE_FAIL27]] ], [ [[CMPXCHG_SUCCESS38]], %[[SEQCST_FAIL28]] ]
-// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
-// CHECK: [[MONOTONIC_FAIL42]]:
-// CHECK-NEXT: [[CMPXCHG_PAIR46:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED40]], i32 [[CMPXCHG_DESIRED41]] syncscope("workgroup-one-as") acq_rel monotonic, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV47:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR46]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV47]], ptr [[TMP2]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS48:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR46]], 1
-// CHECK-NEXT: br label %[[ATOMIC_CONTINUE45:.*]]
-// CHECK: [[ACQUIRE_FAIL43]]:
-// CHECK-NEXT: [[CMPXCHG_PAIR49:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED40]], i32 [[CMPXCHG_DESIRED41]] syncscope("workgroup-one-as") acq_rel acquire, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV50:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR49]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV50]], ptr [[TMP2]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS51:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR49]], 1
-// CHECK-NEXT: br label %[[ATOMIC_CONTINUE45]]
-// CHECK: [[SEQCST_FAIL44]]:
-// CHECK-NEXT: [[CMPXCHG_PAIR52:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED40]], i32 [[CMPXCHG_DESIRED41]] syncscope("workgroup-one-as") acq_rel seq_cst, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV53:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR52]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV53]], ptr [[TMP2]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS54:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR52]], 1
-// CHECK-NEXT: br label %[[ATOMIC_CONTINUE45]]
-// CHECK: [[ATOMIC_CONTINUE45]]:
-// CHECK-NEXT: [[CMPXCGH_SUCCESS55:%.*]] = phi i1 [ [[CMPXCHG_SUCCESS48]], %[[MONOTONIC_FAIL42]] ], [ [[CMPXCHG_SUCCESS51]], %[[ACQUIRE_FAIL43]] ], [ [[CMPXCHG_SUCCESS54]], %[[SEQCST_FAIL44]] ]
-// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
-// CHECK: [[MONOTONIC_FAIL58]]:
-// CHECK-NEXT: [[CMPXCHG_PAIR62:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED56]], i32 [[CMPXCHG_DESIRED57]] syncscope("workgroup") seq_cst monotonic, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV63:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR62]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV63]], ptr [[TMP2]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS64:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR62]], 1
-// CHECK-NEXT: br label %[[ATOMIC_CONTINUE61:.*]]
-// CHECK: [[ACQUIRE_FAIL59]]:
-// CHECK-NEXT: [[CMPXCHG_PAIR65:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED56]], i32 [[CMPXCHG_DESIRED57]] syncscope("workgroup") seq_cst acquire, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV66:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR65]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV66]], ptr [[TMP2]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS67:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR65]], 1
-// CHECK-NEXT: br label %[[ATOMIC_CONTINUE61]]
-// CHECK: [[SEQCST_FAIL60]]:
-// CHECK-NEXT: [[CMPXCHG_PAIR68:%.*]] = cmpxchg ptr [[TMP0]], i32 [[CMPXCHG_EXPECTED56]], i32 [[CMPXCHG_DESIRED57]] syncscope("workgroup") seq_cst seq_cst, align 4
-// CHECK-NEXT: [[CMPXCHG_PREV69:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR68]], 0
-// CHECK-NEXT: store i32 [[CMPXCHG_PREV69]], ptr [[TMP2]], align 4
-// CHECK-NEXT: [[CMPXCHG_SUCCESS70:%.*]] = extractvalue { i32, i1 } [[CMPXCHG_PAIR68]], 1
-// CHECK-NEXT: br label %[[ATOMIC_CONTINUE61]]
-// CHECK: [[ATOMIC_CONTINUE61]]:
-// CHECK-NEXT: [[CMPXCGH_SUCCESS71:%.*]] = phi i1 [ [[CMPXCHG_SUCCESS64]], %[[MONOTONIC_FAIL58]] ], [ [[CMPXCHG_SUCCESS67]], %[[ACQUIRE_FAIL59]] ], [ [[CMPXCHG_SUCCESS70]], %[[SEQCST_FAIL60]] ]
-// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
-//
+// CHECK-LABEL: @generalFailureOrder
void generalFailureOrder(atomic_int *ptr, int *ptr2, int success, int fail) {
__opencl_atomic_compare_exchange_strong(ptr, ptr2, 42, success, fail, memory_scope_work_group);
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+// CHECK: switch i32 {{.*}}, label %[[MONOTONIC:[0-9a-zA-Z._]+]] [
+ // CHECK-NEXT: i32 1, label %[[ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 2, label %[[ACQUIRE]]
+ // CHECK-NEXT: i32 3, label %[[RELEASE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 4, label %[[ACQREL:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 5, label %[[SEQCST:[0-9a-zA-Z._]+]]
+
+ // CHECK: [[MONOTONIC]]
+ // CHECK: switch {{.*}}, label %[[MONOTONIC_MONOTONIC:[0-9a-zA-Z._]+]] [
+ // CHECK-NEXT: i32 1, label %[[MONOTONIC_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 2, label %[[MONOTONIC_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 5, label %[[MONOTONIC_SEQCST:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: ]
+
+ // CHECK: [[ACQUIRE]]
+ // CHECK: switch {{.*}}, label %[[ACQUIRE_MONOTONIC:[0-9a-zA-Z._]+]] [
+ // CHECK-NEXT: i32 1, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 2, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 5, label %[[ACQUIRE_SEQCST:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: ]
+
+ // CHECK: [[RELEASE]]
+ // CHECK: switch {{.*}}, label %[[RELEASE_MONOTONIC:[0-9a-zA-Z._]+]] [
+ // CHECK-NEXT: i32 1, label %[[RELEASE_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 2, label %[[RELEASE_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 5, label %[[RELEASE_SEQCST:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: ]
+
+ // CHECK: [[ACQREL]]
+ // CHECK: switch {{.*}}, label %[[ACQREL_MONOTONIC:[0-9a-zA-Z._]+]] [
+ // CHECK-NEXT: i32 1, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 2, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 5, label %[[ACQREL_SEQCST:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: ]
+
+ // CHECK: [[SEQCST]]
+ // CHECK: switch {{.*}}, label %[[SEQCST_MONOTONIC:[0-9a-zA-Z._]+]] [
+ // CHECK-NEXT: i32 1, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 2, label %[[SEQCST_ACQUIRE]]
+ // CHECK-NEXT: i32 5, label %[[SEQCST_SEQCST:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: ]
+
+ // CHECK: [[MONOTONIC_MONOTONIC]]
+ // CHECK: cmpxchg {{.*}} monotonic monotonic, align 4
+ // CHECK: br
+
+ // CHECK: [[MONOTONIC_ACQUIRE]]
+ // CHECK: cmpxchg {{.*}} monotonic acquire, align 4
+ // CHECK: br
+
+ // CHECK: [[MONOTONIC_SEQCST]]
+ // CHECK: cmpxchg {{.*}} monotonic seq_cst, align 4
+ // CHECK: br
+
+ // CHECK: [[ACQUIRE_MONOTONIC]]
+ // CHECK: cmpxchg {{.*}} acquire monotonic, align 4
+ // CHECK: br
+
+ // CHECK: [[ACQUIRE_ACQUIRE]]
+ // CHECK: cmpxchg {{.*}} acquire acquire, align 4
+ // CHECK: br
+
+ // CHECK: [[ACQUIRE_SEQCST]]
+ // CHECK: cmpxchg {{.*}} acquire seq_cst, align 4
+ // CHECK: br
+
+ // CHECK: [[RELEASE_MONOTONIC]]
+ // CHECK: cmpxchg {{.*}} release monotonic, align 4
+ // CHECK: br
+
+ // CHECK: [[RELEASE_ACQUIRE]]
+ // CHECK: cmpxchg {{.*}} release acquire, align 4
+ // CHECK: br
+
+ // CHECK: [[RELEASE_SEQCST]]
+ // CHECK: cmpxchg {{.*}} release seq_cst, align 4
+ // CHECK: br
+
+ // CHECK: [[ACQREL_MONOTONIC]]
+ // CHECK: cmpxchg {{.*}} acq_rel monotonic, align 4
+ // CHECK: br
+
+ // CHECK: [[ACQREL_ACQUIRE]]
+ // CHECK: cmpxchg {{.*}} acq_rel acquire, align 4
+ // CHECK: br
+
+ // CHECK: [[ACQREL_SEQCST]]
+ // CHECK: cmpxchg {{.*}} acq_rel seq_cst, align 4
+ // CHECK: br
+
+ // CHECK: [[SEQCST_MONOTONIC]]
+ // CHECK: cmpxchg {{.*}} seq_cst monotonic, align 4
+ // CHECK: br
+
+ // CHECK: [[SEQCST_ACQUIRE]]
+ // CHECK: cmpxchg {{.*}} seq_cst acquire, align 4
+ // CHECK: br
+
+ // CHECK: [[SEQCST_SEQCST]]
+ // CHECK: cmpxchg {{.*}} seq_cst seq_cst, align 4
+ // CHECK: br
}
-// CHECK-LABEL: define dso_local i32 @test_volatile(
-// CHECK-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[I_ADDR]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[I_ADDR]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load atomic volatile i32, ptr [[TMP0]] syncscope("workgroup") seq_cst, align 4
-// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP]], align 4
-// CHECK-NEXT: ret i32 [[TMP2]]
-//
int test_volatile(volatile atomic_int *i) {
+ // CHECK-LABEL: @test_volatile
+ // CHECK: %[[i_addr:.*]] = alloca ptr
+ // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32
+ // CHECK-NEXT: store ptr %i, ptr addrspace(5) %[[i_addr]]
+ // CHECK-NEXT: %[[addr:.*]] = load ptr, ptr addrspace(5) %[[i_addr]]
+ // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, ptr %[[addr]] syncscope("workgroup") seq_cst, align 4
+ // CHECK-NEXT: store i32 %[[res]], ptr addrspace(5) %[[atomicdst]]
+ // CHECK-NEXT: %[[retval:.*]] = load i32, ptr addrspace(5) %[[atomicdst]]
+ // CHECK-NEXT: ret i32 %[[retval]]
return __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
}
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index 195f4e215ae94a..89aaf6d1ad83f8 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -17,7 +17,6 @@
//===----------------------------------------------------------------------===//
// NOTE: NO INCLUDE GUARD DESIRED!
-// FIXME: Redundant with Analysis/TargetLibraryInfo.def
// Provide definitions of macros so that users of this file do not have to
// define everything to use it...
diff --git a/llvm/include/llvm/MC/TargetRegistry.h b/llvm/include/llvm/MC/TargetRegistry.h
index 74d4f33f45fdbb..42d510c17bce39 100644
--- a/llvm/include/llvm/MC/TargetRegistry.h
+++ b/llvm/include/llvm/MC/TargetRegistry.h
@@ -456,7 +456,12 @@ class Target {
StringRef TT, StringRef CPU, StringRef Features,
const TargetOptions &Options, std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM = std::nullopt,
- CodeGenOptLevel OL = CodeGenOptLevel::Default, bool JIT = false) const;
+ CodeGenOptLevel OL = CodeGenOptLevel::Default, bool JIT = false) const {
+ if (!TargetMachineCtorFn)
+ return nullptr;
+ return TargetMachineCtorFn(*this, Triple(TT), CPU, Features, Options, RM,
+ CM, OL, JIT);
+ }
/// createMCAsmBackend - Create a target specific assembly parser.
MCAsmBackend *createMCAsmBackend(const MCSubtargetInfo &STI,
diff --git a/llvm/include/llvm/Support/AtomicOrdering.h b/llvm/include/llvm/Support/AtomicOrdering.h
index 9c1d3801020a3c..010bc06bb85707 100644
--- a/llvm/include/llvm/Support/AtomicOrdering.h
+++ b/llvm/include/llvm/Support/AtomicOrdering.h
@@ -163,10 +163,10 @@ inline AtomicOrdering fromCABI(AtomicOrderingCABI AO) {
static const AtomicOrdering lookup[8] = {
/* relaxed */ AtomicOrdering::Monotonic,
/* consume */ AtomicOrdering::Acquire,
- /* acquire */ AtomicOrdering::Acquire,
- /* release */ AtomicOrdering::Release,
- /* acq_rel */ AtomicOrdering::AcquireRelease,
- /* acq_seq */ AtomicOrdering::SequentiallyConsistent,
+ /* acquire */ AtomicOrdering::Acquire,
+ /* release */ AtomicOrdering::Release,
+ /* acq_rel */ AtomicOrdering::AcquireRelease,
+ /* acq_seq */ AtomicOrdering::SequentiallyConsistent,
};
return lookup[static_cast<size_t>(AO)];
}
diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
index 9e288b9c84c4a2..b8e56c755fbda8 100644
--- a/llvm/include/llvm/Target/TargetMachine.h
+++ b/llvm/include/llvm/Target/TargetMachine.h
@@ -114,8 +114,6 @@ class TargetMachine {
// PGO related tunables.
std::optional<PGOOptions> PGOOption;
- bool IsValid = true;
-
public:
mutable TargetOptions Options;
@@ -123,8 +121,6 @@ class TargetMachine {
void operator=(const TargetMachine &) = delete;
virtual ~TargetMachine();
- bool isValid() const { return IsValid; }
-
const Target &getTarget() const { return TheTarget; }
const Triple &getTargetTriple() const { return TargetTriple; }
diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
index 82f70983c9acb6..4ef7bd859101f2 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -195,8 +195,9 @@ namespace llvm {
/// Emit a call to the __atomic_compare_exchange function.
/// Defined here: https://llvm.org/docs/Atomics.html#libcalls-atomic,
/// https://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary#list_of_library_routines
- /// (Different signature than the builtins defined here:
- /// https://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary#GCC_intrinsics)
+///
+ /// NOTE: Signature is different to the builtins defined here:
+ /// https://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary#GCC_intrinsics
Value *emitAtomicCompareExchange(Value *Size, Value *Ptr, Value *Expected,
Value *Desired, Value *SuccessMemorder,
Value *FailureMemorder, IRBuilderBase &B,
@@ -205,7 +206,7 @@ namespace llvm {
/// Variant of __atomic_compare_exchange where \p Size is either 1, 2, 4, 8,
/// or 16.
- Value *emitAtomicCompareExchangeN(int Size, Value *Ptr, Value *Expected,
+ Value *emitAtomicCompareExchangeN(size_t Size, Value *Ptr, Value *Expected,
Value *Desired, Value *SuccessMemorder,
Value *FailureMemorder, IRBuilderBase &B,
const DataLayout &DL,
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index e907be02adfc22..e170f4063b44d1 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -204,11 +204,6 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.disableAllFunctions();
TLI.setAvailable(llvm::LibFunc___kmpc_alloc_shared);
TLI.setAvailable(llvm::LibFunc___kmpc_free_shared);
-
- // FIXME: Some regression tests require this function, even though it is not
- // supported.
- TLI.setAvailable(llvm::LibFunc_atomic_compare_exchange);
-
return;
}
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 269f7ff34a8f19..49836b914784fc 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -19,7 +19,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/InstSimplifyFolder.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/AtomicExpand.h"
#include "llvm/CodeGen/AtomicExpandUtils.h"
#include "llvm/CodeGen/RuntimeLibcallUtil.h"
@@ -52,7 +51,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Utils/BuildBuiltins.h"
#include "llvm/Transforms/Utils/LowerAtomic.h"
#include <cassert>
#include <cstdint>
@@ -67,7 +65,6 @@ namespace {
class AtomicExpandImpl {
const TargetLowering *TLI = nullptr;
const DataLayout *DL = nullptr;
- TargetLibraryInfo *TLII = nullptr;
private:
bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
@@ -123,7 +120,7 @@ class AtomicExpandImpl {
CreateCmpXchgInstFun CreateCmpXchg);
public:
- bool run(Function &F, const TargetMachine *TM, TargetLibraryInfo *TLII);
+ bool run(Function &F, const TargetMachine *TM);
};
class AtomicExpandLegacy : public FunctionPass {
@@ -134,8 +131,6 @@ class AtomicExpandLegacy : public FunctionPass {
initializeAtomicExpandLegacyPass(*PassRegistry::getPassRegistry());
}
- void getAnalysisUsage(AnalysisUsage &) const override;
-
bool runOnFunction(Function &F) override;
};
@@ -208,13 +203,11 @@ static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
}
-bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM,
- TargetLibraryInfo *TLII) {
+bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
const auto *Subtarget = TM->getSubtargetImpl(F);
if (!Subtarget->enableAtomicExpand())
return false;
TLI = Subtarget->getTargetLowering();
- this->TLII = TLII;
DL = &F.getDataLayout();
SmallVector<Instruction *, 1> AtomicInsts;
@@ -356,18 +349,14 @@ bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM,
return MadeChange;
}
-void AtomicExpandLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
-}
-
bool AtomicExpandLegacy::runOnFunction(Function &F) {
+
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
if (!TPC)
return false;
- auto &&TLIAnalysis = getAnalysis<TargetLibraryInfoWrapperPass>();
auto *TM = &TPC->getTM<TargetMachine>();
AtomicExpandImpl AE;
- return AE.run(F, TM, &TLIAnalysis.getTLI(F));
+ return AE.run(F, TM);
}
FunctionPass *llvm::createAtomicExpandLegacyPass() {
@@ -378,8 +367,7 @@ PreservedAnalyses AtomicExpandPass::run(Function &F,
FunctionAnalysisManager &AM) {
AtomicExpandImpl AE;
- auto &&TLII = AM.getResult<TargetLibraryAnalysis>(F);
- bool Changed = AE.run(F, TM, &TLII);
+ bool Changed = AE.run(F, TM);
if (!Changed)
return PreservedAnalyses::all();
@@ -1724,48 +1712,18 @@ void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
}
void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
- Module *M = I->getModule();
- const DataLayout &DL = M->getDataLayout();
+ static const RTLIB::Libcall Libcalls[6] = {
+ RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
+ RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
+ RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
unsigned Size = getAtomicOpSize(I);
- LLVMContext &Ctx = I->getContext();
- IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
- Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
- const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
-
- IRBuilder<> Builder(I);
- Value *Ptr = I->getPointerOperand();
- Value *Cmp = I->getCompareOperand();
- Value *Val = I->getNewValOperand();
-
- AllocaInst *ExpectedPtr = AllocaBuilder.CreateAlloca(Cmp->getType(), nullptr,
- "cmpxchg.expected.ptr");
- Builder.CreateStore(Cmp, ExpectedPtr);
-
- AllocaInst *DesiredPtr = AllocaBuilder.CreateAlloca(Val->getType(), nullptr,
- "cmpxchg.desired.ptr");
- Builder.CreateStore(Val, DesiredPtr);
-
- AllocaInst *PrevPtr =
- AllocaBuilder.CreateAlloca(Val->getType(), nullptr, "cmpxchg.prev.ptr");
- Value *SuccessResult = emitAtomicCompareExchangeBuiltin(
- Ptr, ExpectedPtr, DesiredPtr, I->isWeak(), I->isVolatile(),
- I->getSuccessOrdering(), I->getFailureOrdering(), I->getSyncScopeID(),
- PrevPtr, Cmp->getType(), {}, {}, I->getAlign(), Builder, DL, TLII, TLI,
- {}, {},
- /*AllowInstruction=*/false, /*AllowSwitch=*/true,
- /*AllowSizedLibcall=*/true);
-
- // The final result from the CAS is a pair
- // {load of 'expected' alloca, bool result from call}
- Type *FinalResultTy = I->getType();
- Value *V = PoisonValue::get(FinalResultTy);
- Value *ExpectedOut = Builder.CreateAlignedLoad(
- Cmp->getType(), PrevPtr, AllocaAlignment, "cmpxchg.prev.load");
- V = Builder.CreateInsertValue(V, ExpectedOut, 0);
- V = Builder.CreateInsertValue(V, SuccessResult, 1);
- I->replaceAllUsesWith(V);
- I->eraseFromParent();
+ bool expanded = expandAtomicOpToLibcall(
+ I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
+ I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
+ Libcalls);
+ if (!expanded)
+ report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");
}
static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index e5b47e77fdafef..d0dfafeaef561f 100644
--- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -44,10 +44,6 @@ static cl::opt<bool> EnableNoTrapAfterNoreturn(
void LLVMTargetMachine::initAsmInfo() {
MRI.reset(TheTarget.createMCRegInfo(getTargetTriple().str()));
- if (!MRI) {
- IsValid = false;
- return;
- }
assert(MRI && "Unable to create reg info");
MII.reset(TheTarget.createMCInstrInfo());
assert(MII && "Unable to create instruction info");
diff --git a/llvm/lib/MC/TargetRegistry.cpp b/llvm/lib/MC/TargetRegistry.cpp
index 459a2167ca1cb5..3be6f1d4634990 100644
--- a/llvm/lib/MC/TargetRegistry.cpp
+++ b/llvm/lib/MC/TargetRegistry.cpp
@@ -15,26 +15,10 @@
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
#include <cassert>
#include <vector>
using namespace llvm;
-TargetMachine *Target::createTargetMachine(StringRef TT, StringRef CPU,
- StringRef Features,
- const TargetOptions &Options,
- std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM,
- CodeGenOptLevel OL, bool JIT) const {
- if (!TargetMachineCtorFn)
- return nullptr;
- TargetMachine *Result = TargetMachineCtorFn(*this, Triple(TT), CPU, Features,
- Options, RM, CM, OL, JIT);
- if (!Result->isValid())
- return nullptr;
- return Result;
-}
-
// Clients are responsible for avoid race conditions in registration.
static Target *FirstTarget = nullptr;
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 6dd3afb56cef88..1ef891d1b677a2 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -296,6 +296,8 @@ getEffectivePPCCodeModel(const Triple &TT, std::optional<CodeModel::Model> CM,
if (TT.isOSAIX())
return CodeModel::Small;
+ assert(TT.isOSBinFormatELF() && "All remaining PPC OSes are ELF based.");
+
if (TT.isArch32Bit())
return CodeModel::Small;
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll
index 8bb9ecac7b94e0..19b02a364ac11a 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll
@@ -791,17 +791,22 @@ define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -817,17 +822,22 @@ define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -843,17 +853,22 @@ define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -869,17 +884,22 @@ define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -895,17 +915,22 @@ define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -921,17 +946,22 @@ define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1039,17 +1069,22 @@ define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1065,17 +1100,22 @@ define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1091,17 +1131,22 @@ define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1117,17 +1162,22 @@ define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1615,17 +1665,22 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1641,17 +1696,22 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1667,17 +1727,22 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1693,17 +1758,22 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -2191,17 +2261,22 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
+; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -2217,17 +2292,22 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
+; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -2243,17 +2323,22 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
+; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -2269,17 +2354,22 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
+; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll
index 48590f90f174c5..e56417167c33b0 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll
@@ -684,17 +684,22 @@ define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -710,17 +715,22 @@ define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -736,17 +746,22 @@ define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -762,17 +777,22 @@ define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -788,17 +808,22 @@ define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -814,17 +839,22 @@ define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -932,17 +962,22 @@ define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -958,17 +993,22 @@ define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -984,17 +1024,22 @@ define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1010,17 +1055,22 @@ define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
+; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 false, 1
+; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1401,17 +1451,22 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1427,17 +1482,22 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1453,17 +1513,22 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1479,17 +1544,22 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
+; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1870,17 +1940,22 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
+; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1896,17 +1971,22 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
+; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1922,17 +2002,22 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
+; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1948,17 +2033,22 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mod
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca double, align 8, addrspace(5)
; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
; COMMON: atomicrmw.start:
; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
+; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
-; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
+; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
+; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
-; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 false, 1
+; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-fp128.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-fp128.ll
index 273dab0d13ca99..e70ab325dd8f31 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-fp128.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-fp128.ll
@@ -15,21 +15,23 @@ define fp128 @test_atomicrmw_xchg_fp128_global_agent(ptr addrspace(1) %ptr, fp12
define fp128 @test_atomicrmw_fadd_fp128_global_agent(ptr addrspace(1) %ptr, fp128 %value) {
; CHECK-LABEL: @test_atomicrmw_fadd_fp128_global_agent(
-; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
-; CHECK-NEXT: [[TMP1:%.*]] = load fp128, ptr addrspace(1) [[PTR:%.*]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = alloca fp128, align 8, addrspace(5)
+; CHECK-NEXT: [[TMP2:%.*]] = load fp128, ptr addrspace(1) [[PTR:%.*]], align 16
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi fp128 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[LOADED:%.*]] = phi fp128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd fp128 [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: store fp128 [[LOADED]], ptr addrspace(5) [[CMPXCHG_EXPECTED_PTR]], align 16
-; CHECK-NEXT: store fp128 [[NEW]], ptr addrspace(5) [[CMPXCHG_DESIRED_PTR]], align 16
-; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load fp128, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
-; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { fp128, i1 } poison, fp128 [[CMPXCHG_PREV_LOAD]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { fp128, i1 } [[TMP2]], i1 false, 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { fp128, i1 } [[TMP3]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { fp128, i1 } [[TMP3]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: store fp128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast fp128 [[NEW]] to i128
+; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP3]], ptr addrspace(5) [[TMP1]], i128 [[TMP4]], i32 5, i32 5)
+; CHECK-NEXT: [[TMP6:%.*]] = load fp128, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { fp128, i1 } poison, fp128 [[TMP6]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { fp128, i1 } [[TMP7]], i1 [[TMP5]], 1
+; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { fp128, i1 } [[TMP8]], 1
+; CHECK-NEXT: [[NEWLOADED]] = extractvalue { fp128, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret fp128 [[NEWLOADED]]
@@ -40,21 +42,23 @@ define fp128 @test_atomicrmw_fadd_fp128_global_agent(ptr addrspace(1) %ptr, fp12
define fp128 @test_atomicrmw_fsub_fp128_global_agent(ptr addrspace(1) %ptr, fp128 %value) {
; CHECK-LABEL: @test_atomicrmw_fsub_fp128_global_agent(
-; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
-; CHECK-NEXT: [[TMP1:%.*]] = load fp128, ptr addrspace(1) [[PTR:%.*]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = alloca fp128, align 8, addrspace(5)
+; CHECK-NEXT: [[TMP2:%.*]] = load fp128, ptr addrspace(1) [[PTR:%.*]], align 16
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi fp128 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[LOADED:%.*]] = phi fp128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub fp128 [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: store fp128 [[LOADED]], ptr addrspace(5) [[CMPXCHG_EXPECTED_PTR]], align 16
-; CHECK-NEXT: store fp128 [[NEW]], ptr addrspace(5) [[CMPXCHG_DESIRED_PTR]], align 16
-; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load fp128, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
-; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { fp128, i1 } poison, fp128 [[CMPXCHG_PREV_LOAD]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { fp128, i1 } [[TMP2]], i1 false, 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { fp128, i1 } [[TMP3]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { fp128, i1 } [[TMP3]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: store fp128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast fp128 [[NEW]] to i128
+; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP3]], ptr addrspace(5) [[TMP1]], i128 [[TMP4]], i32 5, i32 5)
+; CHECK-NEXT: [[TMP6:%.*]] = load fp128, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { fp128, i1 } poison, fp128 [[TMP6]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { fp128, i1 } [[TMP7]], i1 [[TMP5]], 1
+; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { fp128, i1 } [[TMP8]], 1
+; CHECK-NEXT: [[NEWLOADED]] = extractvalue { fp128, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret fp128 [[NEWLOADED]]
@@ -65,21 +69,23 @@ define fp128 @test_atomicrmw_fsub_fp128_global_agent(ptr addrspace(1) %ptr, fp12
define fp128 @test_atomicrmw_fmin_fp128_global_agent(ptr addrspace(1) %ptr, fp128 %value) {
; CHECK-LABEL: @test_atomicrmw_fmin_fp128_global_agent(
-; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
-; CHECK-NEXT: [[TMP1:%.*]] = load fp128, ptr addrspace(1) [[PTR:%.*]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = alloca fp128, align 8, addrspace(5)
+; CHECK-NEXT: [[TMP2:%.*]] = load fp128, ptr addrspace(1) [[PTR:%.*]], align 16
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi fp128 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = call fp128 @llvm.minnum.f128(fp128 [[LOADED]], fp128 [[VALUE:%.*]])
-; CHECK-NEXT: store fp128 [[LOADED]], ptr addrspace(5) [[CMPXCHG_EXPECTED_PTR]], align 16
-; CHECK-NEXT: store fp128 [[TMP2]], ptr addrspace(5) [[CMPXCHG_DESIRED_PTR]], align 16
-; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load fp128, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
-; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { fp128, i1 } poison, fp128 [[CMPXCHG_PREV_LOAD]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = insertvalue { fp128, i1 } [[TMP3]], i1 false, 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { fp128, i1 } [[TMP4]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { fp128, i1 } [[TMP4]], 0
+; CHECK-NEXT: [[LOADED:%.*]] = phi fp128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = call fp128 @llvm.minnum.f128(fp128 [[LOADED]], fp128 [[VALUE:%.*]])
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: store fp128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast fp128 [[TMP3]] to i128
+; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP4]], ptr addrspace(5) [[TMP1]], i128 [[TMP5]], i32 5, i32 5)
+; CHECK-NEXT: [[TMP7:%.*]] = load fp128, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { fp128, i1 } poison, fp128 [[TMP7]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { fp128, i1 } [[TMP8]], i1 [[TMP6]], 1
+; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { fp128, i1 } [[TMP9]], 1
+; CHECK-NEXT: [[NEWLOADED]] = extractvalue { fp128, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret fp128 [[NEWLOADED]]
@@ -90,21 +96,23 @@ define fp128 @test_atomicrmw_fmin_fp128_global_agent(ptr addrspace(1) %ptr, fp12
define fp128 @test_atomicrmw_fmax_fp128_global_agent(ptr addrspace(1) %ptr, fp128 %value) {
; CHECK-LABEL: @test_atomicrmw_fmax_fp128_global_agent(
-; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca fp128, align 16, addrspace(5)
-; CHECK-NEXT: [[TMP1:%.*]] = load fp128, ptr addrspace(1) [[PTR:%.*]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = alloca fp128, align 8, addrspace(5)
+; CHECK-NEXT: [[TMP2:%.*]] = load fp128, ptr addrspace(1) [[PTR:%.*]], align 16
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi fp128 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = call fp128 @llvm.maxnum.f128(fp128 [[LOADED]], fp128 [[VALUE:%.*]])
-; CHECK-NEXT: store fp128 [[LOADED]], ptr addrspace(5) [[CMPXCHG_EXPECTED_PTR]], align 16
-; CHECK-NEXT: store fp128 [[TMP2]], ptr addrspace(5) [[CMPXCHG_DESIRED_PTR]], align 16
-; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load fp128, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
-; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { fp128, i1 } poison, fp128 [[CMPXCHG_PREV_LOAD]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = insertvalue { fp128, i1 } [[TMP3]], i1 false, 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { fp128, i1 } [[TMP4]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { fp128, i1 } [[TMP4]], 0
+; CHECK-NEXT: [[LOADED:%.*]] = phi fp128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = call fp128 @llvm.maxnum.f128(fp128 [[LOADED]], fp128 [[VALUE:%.*]])
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: store fp128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast fp128 [[TMP3]] to i128
+; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP4]], ptr addrspace(5) [[TMP1]], i128 [[TMP5]], i32 5, i32 5)
+; CHECK-NEXT: [[TMP7:%.*]] = load fp128, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { fp128, i1 } poison, fp128 [[TMP7]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { fp128, i1 } [[TMP8]], i1 [[TMP6]], 1
+; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { fp128, i1 } [[TMP9]], 1
+; CHECK-NEXT: [[NEWLOADED]] = extractvalue { fp128, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret fp128 [[NEWLOADED]]
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i128.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i128.ll
index 5e989c8614a0c6..37ccbd973bdeb6 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i128.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i128.ll
@@ -73,22 +73,23 @@ define i128 @test_atomicrmw_xor_i128_global(ptr addrspace(1) %ptr, i128 %value)
define i128 @test_atomicrmw_max_i128_global(ptr addrspace(1) %ptr, i128 %value) {
; CHECK-LABEL: @test_atomicrmw_max_i128_global(
-; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca i128, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca i128, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca i128, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = alloca i128, align 8, addrspace(5)
+; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i128 [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i128 [[LOADED]], i128 [[VALUE]]
-; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[CMPXCHG_EXPECTED_PTR]], align 8
-; CHECK-NEXT: store i128 [[NEW]], ptr addrspace(5) [[CMPXCHG_DESIRED_PTR]], align 8
-; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load i128, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
-; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { i128, i1 } poison, i128 [[CMPXCHG_PREV_LOAD]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = insertvalue { i128, i1 } [[TMP3]], i1 false, 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP4]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP4]], 0
+; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i128 [[LOADED]], [[VALUE:%.*]]
+; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i128 [[LOADED]], i128 [[VALUE]]
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP4]], ptr addrspace(5) [[TMP1]], i128 [[NEW]], i32 5, i32 5)
+; CHECK-NEXT: [[TMP6:%.*]] = load i128, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { i128, i1 } poison, i128 [[TMP6]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { i128, i1 } [[TMP7]], i1 [[TMP5]], 1
+; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP8]], 1
+; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret i128 [[NEWLOADED]]
@@ -99,22 +100,23 @@ define i128 @test_atomicrmw_max_i128_global(ptr addrspace(1) %ptr, i128 %value)
define i128 @test_atomicrmw_min_i128_global(ptr addrspace(1) %ptr, i128 %value) {
; CHECK-LABEL: @test_atomicrmw_min_i128_global(
-; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca i128, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca i128, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca i128, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = alloca i128, align 8, addrspace(5)
+; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = icmp sle i128 [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i128 [[LOADED]], i128 [[VALUE]]
-; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[CMPXCHG_EXPECTED_PTR]], align 8
-; CHECK-NEXT: store i128 [[NEW]], ptr addrspace(5) [[CMPXCHG_DESIRED_PTR]], align 8
-; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load i128, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
-; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { i128, i1 } poison, i128 [[CMPXCHG_PREV_LOAD]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = insertvalue { i128, i1 } [[TMP3]], i1 false, 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP4]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP4]], 0
+; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp sle i128 [[LOADED]], [[VALUE:%.*]]
+; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i128 [[LOADED]], i128 [[VALUE]]
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP4]], ptr addrspace(5) [[TMP1]], i128 [[NEW]], i32 5, i32 5)
+; CHECK-NEXT: [[TMP6:%.*]] = load i128, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { i128, i1 } poison, i128 [[TMP6]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { i128, i1 } [[TMP7]], i1 [[TMP5]], 1
+; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP8]], 1
+; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret i128 [[NEWLOADED]]
@@ -125,22 +127,23 @@ define i128 @test_atomicrmw_min_i128_global(ptr addrspace(1) %ptr, i128 %value)
define i128 @test_atomicrmw_umax_i128_global(ptr addrspace(1) %ptr, i128 %value) {
; CHECK-LABEL: @test_atomicrmw_umax_i128_global(
-; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca i128, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca i128, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca i128, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = alloca i128, align 8, addrspace(5)
+; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i128 [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i128 [[LOADED]], i128 [[VALUE]]
-; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[CMPXCHG_EXPECTED_PTR]], align 8
-; CHECK-NEXT: store i128 [[NEW]], ptr addrspace(5) [[CMPXCHG_DESIRED_PTR]], align 8
-; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load i128, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
-; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { i128, i1 } poison, i128 [[CMPXCHG_PREV_LOAD]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = insertvalue { i128, i1 } [[TMP3]], i1 false, 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP4]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP4]], 0
+; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i128 [[LOADED]], [[VALUE:%.*]]
+; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i128 [[LOADED]], i128 [[VALUE]]
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP4]], ptr addrspace(5) [[TMP1]], i128 [[NEW]], i32 5, i32 5)
+; CHECK-NEXT: [[TMP6:%.*]] = load i128, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { i128, i1 } poison, i128 [[TMP6]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { i128, i1 } [[TMP7]], i1 [[TMP5]], 1
+; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP8]], 1
+; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret i128 [[NEWLOADED]]
@@ -151,22 +154,23 @@ define i128 @test_atomicrmw_umax_i128_global(ptr addrspace(1) %ptr, i128 %value)
define i128 @test_atomicrmw_umin_i128_global(ptr addrspace(1) %ptr, i128 %value) {
; CHECK-LABEL: @test_atomicrmw_umin_i128_global(
-; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca i128, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca i128, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca i128, align 8, addrspace(5)
-; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = alloca i128, align 8, addrspace(5)
+; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr addrspace(1) [[PTR:%.*]], align 16
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i128 [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i128 [[LOADED]], i128 [[VALUE]]
-; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[CMPXCHG_EXPECTED_PTR]], align 8
-; CHECK-NEXT: store i128 [[NEW]], ptr addrspace(5) [[CMPXCHG_DESIRED_PTR]], align 8
-; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load i128, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
-; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { i128, i1 } poison, i128 [[CMPXCHG_PREV_LOAD]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = insertvalue { i128, i1 } [[TMP3]], i1 false, 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP4]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP4]], 0
+; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i128 [[LOADED]], [[VALUE:%.*]]
+; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i128 [[LOADED]], i128 [[VALUE]]
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: store i128 [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP4]], ptr addrspace(5) [[TMP1]], i128 [[NEW]], i32 5, i32 5)
+; CHECK-NEXT: [[TMP6:%.*]] = load i128, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { i128, i1 } poison, i128 [[TMP6]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { i128, i1 } [[TMP7]], i1 [[TMP5]], 1
+; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP8]], 1
+; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret i128 [[NEWLOADED]]
@@ -177,16 +181,17 @@ define i128 @test_atomicrmw_umin_i128_global(ptr addrspace(1) %ptr, i128 %value)
define i128 @test_cmpxchg_i128_global(ptr addrspace(1) %out, i128 %in, i128 %old) {
; CHECK-LABEL: @test_cmpxchg_i128_global(
-; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca i128, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca i128, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca i128, align 8, addrspace(5)
+; CHECK-NEXT: [[TMP1:%.*]] = alloca i128, align 8, addrspace(5)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i128, ptr addrspace(1) [[OUT:%.*]], i64 4
-; CHECK-NEXT: store i128 [[OLD:%.*]], ptr addrspace(5) [[CMPXCHG_EXPECTED_PTR]], align 8
-; CHECK-NEXT: store i128 [[IN:%.*]], ptr addrspace(5) [[CMPXCHG_DESIRED_PTR]], align 8
-; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load i128, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
-; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i128, i1 } poison, i128 [[CMPXCHG_PREV_LOAD]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { i128, i1 } [[TMP1]], i1 false, 1
-; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[GEP]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: store i128 [[OLD:%.*]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[TMP2]], ptr addrspace(5) [[TMP1]], i128 [[IN:%.*]], i32 5, i32 5)
+; CHECK-NEXT: [[TMP4:%.*]] = load i128, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) [[TMP1]])
+; CHECK-NEXT: [[TMP5:%.*]] = insertvalue { i128, i1 } poison, i128 [[TMP4]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { i128, i1 } [[TMP5]], i1 [[TMP3]], 1
+; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { i128, i1 } [[TMP6]], 0
; CHECK-NEXT: ret i128 [[EXTRACT]]
;
%gep = getelementptr i128, ptr addrspace(1) %out, i64 4
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-fp-vector.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-fp-vector.ll
index 8f74684497ecfe..97c5a77083f5c2 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-fp-vector.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-fp-vector.ll
@@ -12,17 +12,22 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align2(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd <2 x half> [[LOADED]], [[VALUE]]
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = load <2 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } [[TMP7]], i1 false, 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -38,17 +43,22 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent_align2(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd <2 x bfloat> [[LOADED]], [[VALUE]]
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP7]], i1 false, 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -152,17 +162,22 @@ define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align2(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x half> [[LOADED]], [[VALUE]]
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 false, 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -178,17 +193,22 @@ define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align2(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x bfloat> [[LOADED]], [[VALUE]]
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 false, 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -204,17 +224,22 @@ define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align4(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x half> [[LOADED]], [[VALUE]]
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 false, 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -230,17 +255,22 @@ define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align4(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x bfloat> [[LOADED]], [[VALUE]]
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 false, 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -326,17 +356,22 @@ define <2 x half> @test_atomicrmw_fsub_v2f16_global_agent_align2(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub <2 x half> [[LOADED]], [[VALUE]]
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = load <2 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } [[TMP7]], i1 false, 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -352,17 +387,22 @@ define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_global_agent_align2(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub <2 x bfloat> [[LOADED]], [[VALUE]]
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP7]], i1 false, 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -422,17 +462,22 @@ define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align2(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x half> [[LOADED]], [[VALUE]]
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 false, 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -448,17 +493,22 @@ define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align2(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x bfloat> [[LOADED]], [[VALUE]]
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 false, 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -474,17 +524,22 @@ define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align4(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x half> [[LOADED]], [[VALUE]]
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 false, 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -500,17 +555,22 @@ define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align4(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x bfloat> [[LOADED]], [[VALUE]]
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 false, 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -596,17 +656,22 @@ define <2 x half> @test_atomicrmw_fmin_v2f16_global_agent_align2(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[LOADED]], <2 x half> [[VALUE]])
+; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP7:%.*]] = load <2 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x half>, i1 } [[TMP8]], i1 false, 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -622,17 +687,22 @@ define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_global_agent_align2(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> [[LOADED]], <2 x bfloat> [[VALUE]])
+; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP7:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP8]], i1 false, 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -692,17 +762,22 @@ define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align2(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.minnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
+; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 false, 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -718,17 +793,22 @@ define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align2(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
+; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 false, 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -744,17 +824,22 @@ define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align4(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.minnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
+; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 false, 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -770,17 +855,22 @@ define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align4(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
+; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 false, 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -866,17 +956,22 @@ define <2 x half> @test_atomicrmw_fmax_v2f16_global_agent_align2(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x half> @llvm.maxnum.v2f16(<2 x half> [[LOADED]], <2 x half> [[VALUE]])
+; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP7:%.*]] = load <2 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x half>, i1 } [[TMP8]], i1 false, 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -892,17 +987,22 @@ define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_global_agent_align2(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> [[LOADED]], <2 x bfloat> [[VALUE]])
+; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP7:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP8]], i1 false, 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -962,17 +1062,22 @@ define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align2(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.maxnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
+; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 false, 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -988,17 +1093,22 @@ define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align2(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
+; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 false, 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1014,17 +1124,22 @@ define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align4(ptr addrspace(1
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.maxnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
+; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 false, 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
@@ -1040,17 +1155,22 @@ define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align4(ptr addrspac
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
+; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[CMPXCHG_PREV_PTR]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
+; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 false, 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
diff --git a/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll b/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll
index 70334068e63226..b94023b97a2950 100644
--- a/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll
+++ b/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll
@@ -30,19 +30,15 @@ define i1 @test_cmpxchg_seq_cst(ptr %addr, i128 %desire, i128 %new) {
;
; PWR7-LABEL: @test_cmpxchg_seq_cst(
; PWR7-NEXT: entry:
-; PWR7-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca i128, align 8
-; PWR7-NEXT: store i128 [[DESIRE:%.*]], ptr [[CMPXCHG_EXPECTED_PTR]], align 8
-; PWR7-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca i128, align 8
-; PWR7-NEXT: store i128 [[NEW:%.*]], ptr [[CMPXCHG_DESIRED_PTR]], align 8
-; PWR7-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca i128, align 8
-; PWR7-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i128, ptr [[CMPXCHG_DESIRED_PTR]], align 8
-; PWR7-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_16:%.*]] = call i8 @__atomic_compare_exchange_16(ptr [[ADDR:%.*]], ptr [[CMPXCHG_EXPECTED_PTR]], i128 [[CMPXCHG_DESIRED]], i32 5, i32 5)
-; PWR7-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_16]], 0
-; PWR7-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[CMPXCHG_PREV_PTR]], ptr [[CMPXCHG_EXPECTED_PTR]], i64 16, i1 false)
-; PWR7-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load i128, ptr [[CMPXCHG_PREV_PTR]], align 8
-; PWR7-NEXT: [[TMP0:%.*]] = insertvalue { i128, i1 } poison, i128 [[CMPXCHG_PREV_LOAD]], 0
-; PWR7-NEXT: [[TMP1:%.*]] = insertvalue { i128, i1 } [[TMP0]], i1 [[CMPXCHG_SUCCESS]], 1
-; PWR7-NEXT: [[SUCC:%.*]] = extractvalue { i128, i1 } [[TMP1]], 1
+; PWR7-NEXT: [[TMP0:%.*]] = alloca i128, align 8
+; PWR7-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP0]])
+; PWR7-NEXT: store i128 [[DESIRE:%.*]], ptr [[TMP0]], align 8
+; PWR7-NEXT: [[TMP1:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[ADDR:%.*]], ptr [[TMP0]], i128 [[NEW:%.*]], i32 5, i32 5)
+; PWR7-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8
+; PWR7-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP0]])
+; PWR7-NEXT: [[TMP3:%.*]] = insertvalue { i128, i1 } poison, i128 [[TMP2]], 0
+; PWR7-NEXT: [[TMP4:%.*]] = insertvalue { i128, i1 } [[TMP3]], i1 [[TMP1]], 1
+; PWR7-NEXT: [[SUCC:%.*]] = extractvalue { i128, i1 } [[TMP4]], 1
; PWR7-NEXT: ret i1 [[SUCC]]
;
entry:
diff --git a/llvm/test/Transforms/AtomicExpand/RISCV/atomicrmw-fp.ll b/llvm/test/Transforms/AtomicExpand/RISCV/atomicrmw-fp.ll
index 86cea814de5edf..7e41583189c3d3 100644
--- a/llvm/test/Transforms/AtomicExpand/RISCV/atomicrmw-fp.ll
+++ b/llvm/test/Transforms/AtomicExpand/RISCV/atomicrmw-fp.ll
@@ -3,25 +3,22 @@
define float @test_atomicrmw_fadd_f32(ptr %ptr, float %value) {
; CHECK-LABEL: @test_atomicrmw_fadd_f32(
-; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca float, align 4
-; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca float, align 4
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca float, align 4
-; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = alloca float, align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: store float [[LOADED]], ptr [[CMPXCHG_EXPECTED_PTR]], align 4
-; CHECK-NEXT: store float [[NEW]], ptr [[CMPXCHG_DESIRED_PTR]], align 4
-; CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[CMPXCHG_DESIRED_PTR]], align 4
-; CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_4:%.*]] = call i8 @__atomic_compare_exchange_4(ptr [[PTR]], ptr [[CMPXCHG_EXPECTED_PTR]], i32 [[CMPXCHG_DESIRED]], i32 5, i32 5)
-; CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_4]], 0
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[CMPXCHG_PREV_PTR]], ptr [[CMPXCHG_EXPECTED_PTR]], i64 4, i1 false)
-; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load float, ptr [[CMPXCHG_PREV_PTR]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { float, i1 } poison, float [[CMPXCHG_PREV_LOAD]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { float, i1 } [[TMP2]], i1 [[CMPXCHG_SUCCESS]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { float, i1 } [[TMP3]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { float, i1 } [[TMP3]], 0
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[TMP1]])
+; CHECK-NEXT: store float [[LOADED]], ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast float [[NEW]] to i32
+; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange_4(ptr [[PTR]], ptr [[TMP1]], i32 [[TMP5]], i32 5, i32 5)
+; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[TMP1]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[TMP1]])
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { float, i1 } poison, float [[TMP7]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { float, i1 } [[TMP8]], i1 [[TMP6]], 1
+; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { float, i1 } [[TMP9]], 1
+; CHECK-NEXT: [[NEWLOADED]] = extractvalue { float, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret float [[NEWLOADED]]
@@ -32,25 +29,22 @@ define float @test_atomicrmw_fadd_f32(ptr %ptr, float %value) {
define float @test_atomicrmw_fsub_f32(ptr %ptr, float %value) {
; CHECK-LABEL: @test_atomicrmw_fsub_f32(
-; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca float, align 4
-; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca float, align 4
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca float, align 4
-; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = alloca float, align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: store float [[LOADED]], ptr [[CMPXCHG_EXPECTED_PTR]], align 4
-; CHECK-NEXT: store float [[NEW]], ptr [[CMPXCHG_DESIRED_PTR]], align 4
-; CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[CMPXCHG_DESIRED_PTR]], align 4
-; CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_4:%.*]] = call i8 @__atomic_compare_exchange_4(ptr [[PTR]], ptr [[CMPXCHG_EXPECTED_PTR]], i32 [[CMPXCHG_DESIRED]], i32 5, i32 5)
-; CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_4]], 0
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[CMPXCHG_PREV_PTR]], ptr [[CMPXCHG_EXPECTED_PTR]], i64 4, i1 false)
-; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load float, ptr [[CMPXCHG_PREV_PTR]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { float, i1 } poison, float [[CMPXCHG_PREV_LOAD]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { float, i1 } [[TMP2]], i1 [[CMPXCHG_SUCCESS]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { float, i1 } [[TMP3]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { float, i1 } [[TMP3]], 0
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[TMP1]])
+; CHECK-NEXT: store float [[LOADED]], ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast float [[NEW]] to i32
+; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange_4(ptr [[PTR]], ptr [[TMP1]], i32 [[TMP5]], i32 5, i32 5)
+; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[TMP1]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[TMP1]])
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { float, i1 } poison, float [[TMP7]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { float, i1 } [[TMP8]], i1 [[TMP6]], 1
+; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { float, i1 } [[TMP9]], 1
+; CHECK-NEXT: [[NEWLOADED]] = extractvalue { float, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret float [[NEWLOADED]]
diff --git a/llvm/test/Transforms/AtomicExpand/SPARC/libcalls.ll b/llvm/test/Transforms/AtomicExpand/SPARC/libcalls.ll
index 33436edb19417f..682c1e6848b313 100644
--- a/llvm/test/Transforms/AtomicExpand/SPARC/libcalls.ll
+++ b/llvm/test/Transforms/AtomicExpand/SPARC/libcalls.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S %s -passes=atomic-expand | FileCheck %s
;;; NOTE: this test is actually target-independent -- any target which
@@ -13,65 +12,51 @@ target triple = "sparc-unknown-unknown"
;; First, check the sized calls. Except for cmpxchg, these are fairly
;; straightforward.
+; CHECK-LABEL: @test_load_i16(
+; CHECK: %1 = call i16 @__atomic_load_2(ptr %arg, i32 5)
+; CHECK: ret i16 %1
define i16 @test_load_i16(ptr %arg) {
-; CHECK-LABEL: define i16 @test_load_i16(
-; CHECK-SAME: ptr [[ARG:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = call i16 @__atomic_load_2(ptr [[ARG]], i32 5)
-; CHECK-NEXT: ret i16 [[TMP1]]
-;
%ret = load atomic i16, ptr %arg seq_cst, align 4
ret i16 %ret
}
+; CHECK-LABEL: @test_store_i16(
+; CHECK: call void @__atomic_store_2(ptr %arg, i16 %val, i32 5)
+; CHECK: ret void
define void @test_store_i16(ptr %arg, i16 %val) {
-; CHECK-LABEL: define void @test_store_i16(
-; CHECK-SAME: ptr [[ARG:%.*]], i16 [[VAL:%.*]]) {
-; CHECK-NEXT: call void @__atomic_store_2(ptr [[ARG]], i16 [[VAL]], i32 5)
-; CHECK-NEXT: ret void
-;
store atomic i16 %val, ptr %arg seq_cst, align 4
ret void
}
+; CHECK-LABEL: @test_exchange_i16(
+; CHECK: %1 = call i16 @__atomic_exchange_2(ptr %arg, i16 %val, i32 5)
+; CHECK: ret i16 %1
define i16 @test_exchange_i16(ptr %arg, i16 %val) {
-; CHECK-LABEL: define i16 @test_exchange_i16(
-; CHECK-SAME: ptr [[ARG:%.*]], i16 [[VAL:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = call i16 @__atomic_exchange_2(ptr [[ARG]], i16 [[VAL]], i32 5)
-; CHECK-NEXT: ret i16 [[TMP1]]
-;
%ret = atomicrmw xchg ptr %arg, i16 %val seq_cst
ret i16 %ret
}
+; CHECK-LABEL: @test_cmpxchg_i16(
+; CHECK: %1 = alloca i16, align 2
+; CHECK: call void @llvm.lifetime.start.p0(i64 2, ptr %1)
+; CHECK: store i16 %old, ptr %1, align 2
+; CHECK: %2 = call zeroext i1 @__atomic_compare_exchange_2(ptr %arg, ptr %1, i16 %new, i32 5, i32 0)
+; CHECK: %3 = load i16, ptr %1, align 2
+; CHECK: call void @llvm.lifetime.end.p0(i64 2, ptr %1)
+; CHECK: %4 = insertvalue { i16, i1 } poison, i16 %3, 0
+; CHECK: %5 = insertvalue { i16, i1 } %4, i1 %2, 1
+; CHECK: %ret = extractvalue { i16, i1 } %5, 0
+; CHECK: ret i16 %ret
define i16 @test_cmpxchg_i16(ptr %arg, i16 %old, i16 %new) {
-; CHECK-LABEL: define i16 @test_cmpxchg_i16(
-; CHECK-SAME: ptr [[ARG:%.*]], i16 [[OLD:%.*]], i16 [[NEW:%.*]]) {
-; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca i16, align 2
-; CHECK-NEXT: store i16 [[OLD]], ptr [[CMPXCHG_EXPECTED_PTR]], align 2
-; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca i16, align 2
-; CHECK-NEXT: store i16 [[NEW]], ptr [[CMPXCHG_DESIRED_PTR]], align 2
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca i16, align 2
-; CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i16, ptr [[CMPXCHG_DESIRED_PTR]], align 2
-; CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_2:%.*]] = call i8 @__atomic_compare_exchange_2(ptr [[ARG]], ptr [[CMPXCHG_EXPECTED_PTR]], i16 [[CMPXCHG_DESIRED]], i32 5, i32 0)
-; CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_2]], 0
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[CMPXCHG_PREV_PTR]], ptr [[CMPXCHG_EXPECTED_PTR]], i64 2, i1 false)
-; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load i16, ptr [[CMPXCHG_PREV_PTR]], align 2
-; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i16, i1 } poison, i16 [[CMPXCHG_PREV_LOAD]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { i16, i1 } [[TMP1]], i1 [[CMPXCHG_SUCCESS]], 1
-; CHECK-NEXT: [[RET:%.*]] = extractvalue { i16, i1 } [[TMP2]], 0
-; CHECK-NEXT: ret i16 [[RET]]
-;
%ret_succ = cmpxchg ptr %arg, i16 %old, i16 %new seq_cst monotonic
%ret = extractvalue { i16, i1 } %ret_succ, 0
ret i16 %ret
}
+; CHECK-LABEL: @test_add_i16(
+; CHECK: %1 = call i16 @__atomic_fetch_add_2(ptr %arg, i16 %val, i32 5)
+; CHECK: ret i16 %1
define i16 @test_add_i16(ptr %arg, i16 %val) {
-; CHECK-LABEL: define i16 @test_add_i16(
-; CHECK-SAME: ptr [[ARG:%.*]], i16 [[VAL:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = call i16 @__atomic_fetch_add_2(ptr [[ARG]], i16 [[VAL]], i32 5)
-; CHECK-NEXT: ret i16 [[TMP1]]
-;
%ret = atomicrmw add ptr %arg, i16 %val seq_cst
ret i16 %ret
}
@@ -81,69 +66,62 @@ define i16 @test_add_i16(ptr %arg, i16 %val) {
;; these tests because the "16" suffixed functions aren't available on
;; 32-bit i386.
+; CHECK-LABEL: @test_load_i128(
+; CHECK: %1 = alloca i128, align 8
+; CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %1)
+; CHECK: call void @__atomic_load(i32 16, ptr %arg, ptr %1, i32 5)
+; CHECK: %2 = load i128, ptr %1, align 8
+; CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %1)
+; CHECK: ret i128 %2
define i128 @test_load_i128(ptr %arg) {
-; CHECK-LABEL: define i128 @test_load_i128(
-; CHECK-SAME: ptr [[ARG:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca i128, align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP1]])
-; CHECK-NEXT: call void @__atomic_load(i32 16, ptr [[ARG]], ptr [[TMP1]], i32 5)
-; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP1]])
-; CHECK-NEXT: ret i128 [[TMP2]]
-;
%ret = load atomic i128, ptr %arg seq_cst, align 16
ret i128 %ret
}
+; CHECK-LABEL: @test_store_i128(
+; CHECK: %1 = alloca i128, align 8
+; CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %1)
+; CHECK: store i128 %val, ptr %1, align 8
+; CHECK: call void @__atomic_store(i32 16, ptr %arg, ptr %1, i32 5)
+; CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %1)
+; CHECK: ret void
define void @test_store_i128(ptr %arg, i128 %val) {
-; CHECK-LABEL: define void @test_store_i128(
-; CHECK-SAME: ptr [[ARG:%.*]], i128 [[VAL:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca i128, align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP1]])
-; CHECK-NEXT: store i128 [[VAL]], ptr [[TMP1]], align 8
-; CHECK-NEXT: call void @__atomic_store(i32 16, ptr [[ARG]], ptr [[TMP1]], i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP1]])
-; CHECK-NEXT: ret void
-;
store atomic i128 %val, ptr %arg seq_cst, align 16
ret void
}
+; CHECK-LABEL: @test_exchange_i128(
+; CHECK: %1 = alloca i128, align 8
+; CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %1)
+; CHECK: store i128 %val, ptr %1, align 8
+; CHECK: %2 = alloca i128, align 8
+; CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %2)
+; CHECK: call void @__atomic_exchange(i32 16, ptr %arg, ptr %1, ptr %2, i32 5)
+; CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %1)
+; CHECK: %3 = load i128, ptr %2, align 8
+; CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %2)
+; CHECK: ret i128 %3
define i128 @test_exchange_i128(ptr %arg, i128 %val) {
-; CHECK-LABEL: define i128 @test_exchange_i128(
-; CHECK-SAME: ptr [[ARG:%.*]], i128 [[VAL:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca i128, align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP1]])
-; CHECK-NEXT: store i128 [[VAL]], ptr [[TMP1]], align 8
-; CHECK-NEXT: [[TMP2:%.*]] = alloca i128, align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP2]])
-; CHECK-NEXT: call void @__atomic_exchange(i32 16, ptr [[ARG]], ptr [[TMP1]], ptr [[TMP2]], i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP1]])
-; CHECK-NEXT: [[TMP3:%.*]] = load i128, ptr [[TMP2]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP2]])
-; CHECK-NEXT: ret i128 [[TMP3]]
-;
%ret = atomicrmw xchg ptr %arg, i128 %val seq_cst
ret i128 %ret
}
+; CHECK-LABEL: @test_cmpxchg_i128(
+; CHECK: %1 = alloca i128, align 8
+; CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %1)
+; CHECK: store i128 %old, ptr %1, align 8
+; CHECK: %2 = alloca i128, align 8
+; CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %2)
+; CHECK: store i128 %new, ptr %2, align 8
+; CHECK: %3 = call zeroext i1 @__atomic_compare_exchange(i32 16, ptr %arg, ptr %1, ptr %2, i32 5, i32 0)
+; CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %2)
+; CHECK: %4 = load i128, ptr %1, align 8
+; CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %1)
+; CHECK: %5 = insertvalue { i128, i1 } poison, i128 %4, 0
+; CHECK: %6 = insertvalue { i128, i1 } %5, i1 %3, 1
+; CHECK: %ret = extractvalue { i128, i1 } %6, 0
+; CHECK: ret i128 %ret
define i128 @test_cmpxchg_i128(ptr %arg, i128 %old, i128 %new) {
-; CHECK-LABEL: define i128 @test_cmpxchg_i128(
-; CHECK-SAME: ptr [[ARG:%.*]], i128 [[OLD:%.*]], i128 [[NEW:%.*]]) {
-; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca i128, align 8
-; CHECK-NEXT: store i128 [[OLD]], ptr [[CMPXCHG_EXPECTED_PTR]], align 8
-; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca i128, align 8
-; CHECK-NEXT: store i128 [[NEW]], ptr [[CMPXCHG_DESIRED_PTR]], align 8
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca i128, align 8
-; CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE:%.*]] = call i8 @__atomic_compare_exchange(i32 16, ptr [[ARG]], ptr [[CMPXCHG_EXPECTED_PTR]], ptr [[CMPXCHG_DESIRED_PTR]], i32 5, i32 0)
-; CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE]], 0
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[CMPXCHG_PREV_PTR]], ptr [[CMPXCHG_EXPECTED_PTR]], i64 16, i1 false)
-; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load i128, ptr [[CMPXCHG_PREV_PTR]], align 8
-; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i128, i1 } poison, i128 [[CMPXCHG_PREV_LOAD]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { i128, i1 } [[TMP1]], i1 [[CMPXCHG_SUCCESS]], 1
-; CHECK-NEXT: [[RET:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
-; CHECK-NEXT: ret i128 [[RET]]
-;
%ret_succ = cmpxchg ptr %arg, i128 %old, i128 %new seq_cst monotonic
%ret = extractvalue { i128, i1 } %ret_succ, 0
ret i128 %ret
@@ -153,77 +131,68 @@ define i128 @test_cmpxchg_i128(ptr %arg, i128 %old, i128 %new) {
; __atomic_fetch_add function, so it needs to expand to a cmpxchg
; loop, which then itself expands into a libcall.
+; CHECK-LABEL: @test_add_i128(
+; CHECK: %1 = alloca i128, align 8
+; CHECK: %2 = alloca i128, align 8
+; CHECK: %3 = load i128, ptr %arg, align 16
+; CHECK: br label %atomicrmw.start
+; CHECK:atomicrmw.start:
+; CHECK: %loaded = phi i128 [ %3, %0 ], [ %newloaded, %atomicrmw.start ]
+; CHECK: %new = add i128 %loaded, %val
+; CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %1)
+; CHECK: store i128 %loaded, ptr %1, align 8
+; CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %2)
+; CHECK: store i128 %new, ptr %2, align 8
+; CHECK: %4 = call zeroext i1 @__atomic_compare_exchange(i32 16, ptr %arg, ptr %1, ptr %2, i32 5, i32 5)
+; CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %2)
+; CHECK: %5 = load i128, ptr %1, align 8
+; CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %1)
+; CHECK: %6 = insertvalue { i128, i1 } poison, i128 %5, 0
+; CHECK: %7 = insertvalue { i128, i1 } %6, i1 %4, 1
+; CHECK: %success = extractvalue { i128, i1 } %7, 1
+; CHECK: %newloaded = extractvalue { i128, i1 } %7, 0
+; CHECK: br i1 %success, label %atomicrmw.end, label %atomicrmw.start
+; CHECK:atomicrmw.end:
+; CHECK: ret i128 %newloaded
define i128 @test_add_i128(ptr %arg, i128 %val) {
-; CHECK-LABEL: define i128 @test_add_i128(
-; CHECK-SAME: ptr [[ARG:%.*]], i128 [[VAL:%.*]]) {
-; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca i128, align 8
-; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca i128, align 8
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca i128, align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr [[ARG]], align 16
-; CHECK-NEXT: br label %[[ATOMICRMW_START:.*]]
-; CHECK: [[ATOMICRMW_START]]:
-; CHECK-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], %[[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[NEW:%.*]] = add i128 [[LOADED]], [[VAL]]
-; CHECK-NEXT: store i128 [[LOADED]], ptr [[CMPXCHG_EXPECTED_PTR]], align 8
-; CHECK-NEXT: store i128 [[NEW]], ptr [[CMPXCHG_DESIRED_PTR]], align 8
-; CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE:%.*]] = call i8 @__atomic_compare_exchange(i32 16, ptr [[ARG]], ptr [[CMPXCHG_EXPECTED_PTR]], ptr [[CMPXCHG_DESIRED_PTR]], i32 5, i32 5)
-; CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE]], 0
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[CMPXCHG_PREV_PTR]], ptr [[CMPXCHG_EXPECTED_PTR]], i64 16, i1 false)
-; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load i128, ptr [[CMPXCHG_PREV_PTR]], align 8
-; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { i128, i1 } poison, i128 [[CMPXCHG_PREV_LOAD]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { i128, i1 } [[TMP2]], i1 [[CMPXCHG_SUCCESS]], 1
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP3]], 1
-; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP3]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label %[[ATOMICRMW_END:.*]], label %[[ATOMICRMW_START]]
-; CHECK: [[ATOMICRMW_END]]:
-; CHECK-NEXT: ret i128 [[NEWLOADED]]
-;
%ret = atomicrmw add ptr %arg, i128 %val seq_cst
ret i128 %ret
}
;; Ensure that non-integer types get bitcast correctly on the way in and out of a libcall:
+; CHECK-LABEL: @test_load_double(
+; CHECK: %1 = call i64 @__atomic_load_8(ptr %arg, i32 5)
+; CHECK: %2 = bitcast i64 %1 to double
+; CHECK: ret double %2
define double @test_load_double(ptr %arg, double %val) {
-; CHECK-LABEL: define double @test_load_double(
-; CHECK-SAME: ptr [[ARG:%.*]], double [[VAL:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = call i64 @__atomic_load_8(ptr [[ARG]], i32 5)
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double
-; CHECK-NEXT: ret double [[TMP2]]
-;
%1 = load atomic double, ptr %arg seq_cst, align 16
ret double %1
}
+; CHECK-LABEL: @test_store_double(
+; CHECK: %1 = bitcast double %val to i64
+; CHECK: call void @__atomic_store_8(ptr %arg, i64 %1, i32 5)
+; CHECK: ret void
define void @test_store_double(ptr %arg, double %val) {
-; CHECK-LABEL: define void @test_store_double(
-; CHECK-SAME: ptr [[ARG:%.*]], double [[VAL:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[VAL]] to i64
-; CHECK-NEXT: call void @__atomic_store_8(ptr [[ARG]], i64 [[TMP1]], i32 5)
-; CHECK-NEXT: ret void
-;
store atomic double %val, ptr %arg seq_cst, align 16
ret void
}
+; CHECK-LABEL: @test_cmpxchg_ptr(
+; CHECK: %1 = alloca ptr, align 4
+; CHECK: call void @llvm.lifetime.start.p0(i64 4, ptr %1)
+; CHECK: store ptr %old, ptr %1, align 4
+; CHECK: %2 = ptrtoint ptr %new to i32
+; CHECK: %3 = call zeroext i1 @__atomic_compare_exchange_4(ptr %arg, ptr %1, i32 %2, i32 5, i32 2)
+; CHECK: %4 = load ptr, ptr %1, align 4
+; CHECK: call void @llvm.lifetime.end.p0(i64 4, ptr %1)
+; CHECK: %5 = insertvalue { ptr, i1 } poison, ptr %4, 0
+; CHECK: %6 = insertvalue { ptr, i1 } %5, i1 %3, 1
+; CHECK: %ret = extractvalue { ptr, i1 } %6, 0
+; CHECK: ret ptr %ret
+; CHECK: }
define ptr @test_cmpxchg_ptr(ptr %arg, ptr %old, ptr %new) {
-; CHECK-LABEL: define ptr @test_cmpxchg_ptr(
-; CHECK-SAME: ptr [[ARG:%.*]], ptr [[OLD:%.*]], ptr [[NEW:%.*]]) {
-; CHECK-NEXT: [[CMPXCHG_EXPECTED_PTR:%.*]] = alloca ptr, align 4
-; CHECK-NEXT: store ptr [[OLD]], ptr [[CMPXCHG_EXPECTED_PTR]], align 4
-; CHECK-NEXT: [[CMPXCHG_DESIRED_PTR:%.*]] = alloca ptr, align 4
-; CHECK-NEXT: store ptr [[NEW]], ptr [[CMPXCHG_DESIRED_PTR]], align 4
-; CHECK-NEXT: [[CMPXCHG_PREV_PTR:%.*]] = alloca ptr, align 4
-; CHECK-NEXT: [[CMPXCHG_DESIRED:%.*]] = load i32, ptr [[CMPXCHG_DESIRED_PTR]], align 4
-; CHECK-NEXT: [[__ATOMIC_COMPARE_EXCHANGE_4:%.*]] = call i8 @__atomic_compare_exchange_4(ptr [[ARG]], ptr [[CMPXCHG_EXPECTED_PTR]], i32 [[CMPXCHG_DESIRED]], i32 5, i32 2)
-; CHECK-NEXT: [[CMPXCHG_SUCCESS:%.*]] = icmp eq i8 [[__ATOMIC_COMPARE_EXCHANGE_4]], 0
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[CMPXCHG_PREV_PTR]], ptr [[CMPXCHG_EXPECTED_PTR]], i64 4, i1 false)
-; CHECK-NEXT: [[CMPXCHG_PREV_LOAD:%.*]] = load ptr, ptr [[CMPXCHG_PREV_PTR]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr, i1 } poison, ptr [[CMPXCHG_PREV_LOAD]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr, i1 } [[TMP1]], i1 [[CMPXCHG_SUCCESS]], 1
-; CHECK-NEXT: [[RET:%.*]] = extractvalue { ptr, i1 } [[TMP2]], 0
-; CHECK-NEXT: ret ptr [[RET]]
-;
%ret_succ = cmpxchg ptr %arg, ptr %old, ptr %new seq_cst acquire
%ret = extractvalue { ptr, i1 } %ret_succ, 0
ret ptr %ret
@@ -231,16 +200,14 @@ define ptr @test_cmpxchg_ptr(ptr %arg, ptr %old, ptr %new) {
;; ...and for a non-integer type of large size too.
+; CHECK-LABEL: @test_store_fp128
+; CHECK: %1 = alloca fp128, align 8
+; CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %1)
+; CHECK: store fp128 %val, ptr %1, align 8
+; CHECK: call void @__atomic_store(i32 16, ptr %arg, ptr %1, i32 5)
+; CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %1)
+; CHECK: ret void
define void @test_store_fp128(ptr %arg, fp128 %val) {
-; CHECK-LABEL: define void @test_store_fp128(
-; CHECK-SAME: ptr [[ARG:%.*]], fp128 [[VAL:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca fp128, align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP1]])
-; CHECK-NEXT: store fp128 [[VAL]], ptr [[TMP1]], align 8
-; CHECK-NEXT: call void @__atomic_store(i32 16, ptr [[ARG]], ptr [[TMP1]], i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP1]])
-; CHECK-NEXT: ret void
-;
store atomic fp128 %val, ptr %arg seq_cst, align 16
ret void
}
@@ -250,30 +217,16 @@ define void @test_store_fp128(ptr %arg, fp128 %val) {
;; NOTE: atomicrmw and cmpxchg don't yet support an align attribute;
;; when such support is added, they should also be tested here.
+; CHECK-LABEL: @test_unaligned_load_i16(
+; CHECK: __atomic_load(
define i16 @test_unaligned_load_i16(ptr %arg) {
-; CHECK-LABEL: define i16 @test_unaligned_load_i16(
-; CHECK-SAME: ptr [[ARG:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca i16, align 2
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 2, ptr [[TMP1]])
-; CHECK-NEXT: call void @__atomic_load(i32 2, ptr [[ARG]], ptr [[TMP1]], i32 5)
-; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 2, ptr [[TMP1]])
-; CHECK-NEXT: ret i16 [[TMP2]]
-;
%ret = load atomic i16, ptr %arg seq_cst, align 1
ret i16 %ret
}
+; CHECK-LABEL: @test_unaligned_store_i16(
+; CHECK: __atomic_store(
define void @test_unaligned_store_i16(ptr %arg, i16 %val) {
-; CHECK-LABEL: define void @test_unaligned_store_i16(
-; CHECK-SAME: ptr [[ARG:%.*]], i16 [[VAL:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca i16, align 2
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 2, ptr [[TMP1]])
-; CHECK-NEXT: store i16 [[VAL]], ptr [[TMP1]], align 2
-; CHECK-NEXT: call void @__atomic_store(i32 2, ptr [[ARG]], ptr [[TMP1]], i32 5)
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 2, ptr [[TMP1]])
-; CHECK-NEXT: ret void
-;
store atomic i16 %val, ptr %arg seq_cst, align 1
ret void
}
>From d5c4afb993e4dc691aa8d8303eeb8b56d4d2331a Mon Sep 17 00:00:00 2001
From: "U-BERGUFFLEN\\meinersbur" <llvm-project at meinersbur.de>
Date: Tue, 5 Nov 2024 16:07:40 +0100
Subject: [PATCH 04/17] merge fixes
---
.../llvm/Analysis/TargetLibraryInfo.def | 1 -
llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 2 +-
.../AArch64/Atomics/aarch64-atomicrmw-lse2.ll | 280 +---
.../Atomics/aarch64-atomicrmw-lse2_lse128.ll | 280 +---
.../aarch64-atomicrmw-outline_atomics.ll | 280 +---
.../AArch64/Atomics/aarch64-atomicrmw-rcpc.ll | 280 +---
.../Atomics/aarch64-atomicrmw-rcpc3.ll | 280 +---
.../Atomics/aarch64-atomicrmw-v8_1a.ll | 280 +---
.../AArch64/Atomics/aarch64-atomicrmw-v8a.ll | 280 +---
.../Atomics/aarch64_be-atomicrmw-lse2.ll | 280 +---
.../aarch64_be-atomicrmw-lse2_lse128.ll | 280 +---
.../aarch64_be-atomicrmw-outline_atomics.ll | 280 +---
.../Atomics/aarch64_be-atomicrmw-rcpc.ll | 280 +---
.../Atomics/aarch64_be-atomicrmw-rcpc3.ll | 280 +---
.../Atomics/aarch64_be-atomicrmw-v8_1a.ll | 280 +---
.../Atomics/aarch64_be-atomicrmw-v8a.ll | 280 +---
.../ARM/atomicrmw_exclusive_monitor_ints.ll | 116 +-
llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll | 88 +-
llvm/test/CodeGen/PowerPC/atomics-i128.ll | 948 ++++++-------
llvm/test/CodeGen/PowerPC/atomics.ll | 27 +-
llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll | 980 +++-----------
llvm/test/CodeGen/RISCV/atomic-signext.ll | 1184 +++++------------
.../CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll | 22 +-
llvm/test/CodeGen/X86/atomic-xor.ll | 14 +-
llvm/test/CodeGen/X86/atomic64.ll | 78 +-
llvm/test/CodeGen/X86/cmpxchg8b.ll | 8 +-
.../tools/llvm-tli-checker/ps4-tli-check.yaml | 2 +-
27 files changed, 1626 insertions(+), 5764 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
index 11d3804d7bfeac..0a2a89e2a68de8 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
@@ -473,7 +473,6 @@ TLI_DEFINE_ENUM_INTERNAL(atomic_compare_exchange)
TLI_DEFINE_STRING_INTERNAL("__atomic_compare_exchange")
TLI_DEFINE_SIG_INTERNAL(Bool, SizeT, Ptr, Ptr, Ptr, Int, Int)
-
/// bool __atomic_compare_exchange_1(void *obj, void *expected, uint8_t desired, int success, int failure)
TLI_DEFINE_ENUM_INTERNAL(atomic_compare_exchange_1)
TLI_DEFINE_STRING_INTERNAL("__atomic_compare_exchange_1")
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index e52874a6aee0d2..5d60c8aa9893e9 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1789,7 +1789,7 @@ Value *llvm::emitAtomicCompareExchange(Value *Size, Value *Ptr, Value *Expected,
{Size, Ptr, Expected, Desired, SuccessMemorder, FailureMemorder}, B, TLI);
}
-Value *llvm::emitAtomicCompareExchangeN(int Size, Value *Ptr, Value *Expected,
+Value *llvm::emitAtomicCompareExchangeN(size_t Size, Value *Ptr, Value *Expected,
Value *Desired, Value *SuccessMemorder,
Value *FailureMemorder,
IRBuilderBase &B, const DataLayout &DL,
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
index 4aca464d7972d6..88061756d8feeb 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
@@ -1122,7 +1122,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1135,7 +1134,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1148,7 +1146,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -1161,7 +1158,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1174,7 +1170,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1187,7 +1182,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1200,7 +1194,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1213,7 +1206,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -1226,7 +1218,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1239,7 +1230,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1252,7 +1242,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -1265,7 +1254,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -1278,7 +1266,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -1291,7 +1278,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -1304,7 +1290,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -1318,7 +1303,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1333,7 +1317,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1348,7 +1331,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1363,7 +1345,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1378,7 +1359,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1907,7 +1887,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1920,7 +1899,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1933,7 +1911,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1946,7 +1923,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1959,7 +1935,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1972,7 +1947,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1985,7 +1959,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1998,7 +1971,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -2011,7 +1983,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -2024,7 +1995,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -2037,7 +2007,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -2050,7 +2019,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -2063,7 +2031,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -2076,7 +2043,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -2089,7 +2055,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -2102,7 +2067,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -2116,7 +2080,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -2130,7 +2093,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -2144,7 +2106,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -2158,7 +2119,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -2697,7 +2657,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2710,7 +2669,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2723,7 +2681,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2736,7 +2693,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2749,7 +2705,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2762,7 +2717,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2775,7 +2729,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2788,7 +2741,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2801,7 +2753,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2814,7 +2765,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2827,7 +2777,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2840,7 +2789,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2853,7 +2801,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2866,7 +2813,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2879,7 +2825,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2893,12 +2838,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -2909,12 +2853,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2925,12 +2868,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2941,12 +2883,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2957,12 +2898,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3568,7 +3508,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3583,7 +3522,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3598,7 +3536,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -3613,7 +3550,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3628,7 +3564,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3643,7 +3578,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3658,7 +3592,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3673,7 +3606,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -3688,7 +3620,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3703,7 +3634,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3718,7 +3648,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -3733,7 +3662,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -3748,7 +3676,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -3763,7 +3690,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -3778,7 +3704,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -3795,12 +3720,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3815,12 +3739,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3835,12 +3758,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3855,12 +3777,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3875,12 +3796,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -4417,7 +4337,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4430,7 +4349,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4443,7 +4361,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4456,7 +4373,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4469,7 +4385,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4482,7 +4397,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4495,7 +4409,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4508,7 +4421,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4521,7 +4433,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4534,7 +4445,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4547,7 +4457,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -4560,7 +4469,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -4573,7 +4481,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -4586,7 +4493,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -4599,7 +4505,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -4613,12 +4518,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -4629,12 +4533,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -4645,12 +4548,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -4661,12 +4563,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -4677,12 +4578,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -5217,7 +5117,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5230,7 +5129,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5243,7 +5141,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5256,7 +5153,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5269,7 +5165,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5282,7 +5177,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5295,7 +5189,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5308,7 +5201,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5321,7 +5213,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5334,7 +5225,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5347,7 +5237,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -5360,7 +5249,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -5373,7 +5261,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -5386,7 +5273,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -5399,7 +5285,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -5413,12 +5298,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -5429,12 +5313,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -5445,12 +5328,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -5461,12 +5343,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -5477,12 +5358,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -6134,7 +6014,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -6151,7 +6030,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -6168,7 +6046,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -6185,7 +6062,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -6202,7 +6078,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -6218,7 +6093,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6233,7 +6107,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6248,7 +6121,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6263,7 +6135,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6278,7 +6149,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6293,7 +6163,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6308,7 +6177,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6323,7 +6191,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6338,7 +6205,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6353,7 +6219,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6374,7 +6239,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6397,7 +6261,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6420,7 +6283,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6443,7 +6305,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6466,7 +6327,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -7124,7 +6984,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -7141,7 +7000,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -7158,7 +7016,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -7175,7 +7032,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -7192,7 +7048,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -7208,7 +7063,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7223,7 +7077,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7238,7 +7091,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7253,7 +7105,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -7268,7 +7119,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -7283,7 +7133,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -7298,7 +7147,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -7313,7 +7161,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -7328,7 +7175,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -7343,7 +7189,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -7364,7 +7209,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -7387,7 +7231,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -7410,7 +7253,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -7433,7 +7275,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -7456,7 +7297,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -8108,7 +7948,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -8124,7 +7963,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -8140,7 +7978,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -8156,7 +7993,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -8172,7 +8008,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -8188,7 +8023,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -8203,7 +8037,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -8218,7 +8051,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -8233,7 +8065,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -8248,7 +8079,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -8263,7 +8093,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -8278,7 +8107,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -8293,7 +8121,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -8308,7 +8135,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -8323,7 +8149,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -8344,7 +8169,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -8367,7 +8191,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -8390,7 +8213,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -8413,7 +8235,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -8436,7 +8257,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -9088,7 +8908,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -9104,7 +8923,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -9120,7 +8938,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -9136,7 +8953,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -9152,7 +8968,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -9168,7 +8983,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -9183,7 +8997,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -9198,7 +9011,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -9213,7 +9025,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -9228,7 +9039,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -9243,7 +9053,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -9258,7 +9067,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -9273,7 +9081,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -9288,7 +9095,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -9303,7 +9109,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -9324,7 +9129,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -9347,7 +9151,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -9370,7 +9173,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -9393,7 +9195,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -9416,7 +9217,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
index bcc57e77d693e1..a1712a5ec7a27c 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
@@ -647,7 +647,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -660,7 +659,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -673,7 +671,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -686,7 +683,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -699,7 +695,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -712,7 +707,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -725,7 +719,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -738,7 +731,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -751,7 +743,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -764,7 +755,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -777,7 +767,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -790,7 +779,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -803,7 +791,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -816,7 +803,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -829,7 +815,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -843,7 +828,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -858,7 +842,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -873,7 +856,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -888,7 +870,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -903,7 +884,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1187,7 +1167,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1200,7 +1179,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1213,7 +1191,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1226,7 +1203,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1239,7 +1215,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1252,7 +1227,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1265,7 +1239,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1278,7 +1251,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1291,7 +1263,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1304,7 +1275,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1317,7 +1287,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -1330,7 +1299,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -1343,7 +1311,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -1356,7 +1323,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -1369,7 +1335,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -1382,7 +1347,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1396,7 +1360,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1410,7 +1373,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1424,7 +1386,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1438,7 +1399,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1722,7 +1682,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -1735,7 +1694,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -1748,7 +1706,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -1761,7 +1718,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -1774,7 +1730,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -1787,7 +1742,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -1800,7 +1754,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -1813,7 +1766,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -1826,7 +1778,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -1839,7 +1790,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -1852,7 +1802,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -1865,7 +1814,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -1878,7 +1826,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -1891,7 +1838,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -1904,7 +1850,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -1918,12 +1863,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -1934,12 +1878,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -1950,12 +1893,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -1966,12 +1908,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -1982,12 +1923,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -2533,7 +2473,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2548,7 +2487,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2563,7 +2501,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2578,7 +2515,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2593,7 +2529,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2608,7 +2543,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2623,7 +2557,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2638,7 +2571,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2653,7 +2585,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2668,7 +2599,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2683,7 +2613,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2698,7 +2627,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2713,7 +2641,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2728,7 +2655,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2743,7 +2669,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2760,12 +2685,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2780,12 +2704,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2800,12 +2723,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2820,12 +2742,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2840,12 +2761,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3082,7 +3002,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3095,7 +3014,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3108,7 +3026,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3121,7 +3038,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3134,7 +3050,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3147,7 +3062,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3160,7 +3074,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3173,7 +3086,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3186,7 +3098,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3199,7 +3110,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3212,7 +3122,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -3225,7 +3134,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -3238,7 +3146,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -3251,7 +3158,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -3264,7 +3170,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -3278,12 +3183,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -3294,12 +3198,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -3310,12 +3213,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -3326,12 +3228,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -3342,12 +3243,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3637,7 +3537,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3650,7 +3549,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3663,7 +3561,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3676,7 +3573,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -3689,7 +3585,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -3702,7 +3597,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3715,7 +3609,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3728,7 +3621,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3741,7 +3633,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -3754,7 +3645,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -3767,7 +3657,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -3780,7 +3669,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -3793,7 +3681,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -3806,7 +3693,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -3819,7 +3705,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -3833,12 +3718,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -3849,12 +3733,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -3865,12 +3748,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -3881,12 +3763,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -3897,12 +3778,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -4229,7 +4109,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -4246,7 +4125,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -4263,7 +4141,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -4280,7 +4157,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -4297,7 +4173,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -4313,7 +4188,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -4328,7 +4202,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -4343,7 +4216,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -4358,7 +4230,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -4373,7 +4244,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -4388,7 +4258,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -4403,7 +4272,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -4418,7 +4286,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -4433,7 +4300,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -4448,7 +4314,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -4469,7 +4334,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -4492,7 +4356,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -4515,7 +4378,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -4538,7 +4400,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -4561,7 +4422,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -4894,7 +4754,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -4911,7 +4770,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -4928,7 +4786,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -4945,7 +4802,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -4962,7 +4818,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -4978,7 +4833,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -4993,7 +4847,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -5008,7 +4861,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -5023,7 +4875,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -5038,7 +4889,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -5053,7 +4903,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -5068,7 +4917,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -5083,7 +4931,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -5098,7 +4945,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -5113,7 +4959,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -5134,7 +4979,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -5157,7 +5001,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -5180,7 +5023,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -5203,7 +5045,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -5226,7 +5067,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -5558,7 +5398,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -5574,7 +5413,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -5590,7 +5428,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -5606,7 +5443,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -5622,7 +5458,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -5638,7 +5473,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -5653,7 +5487,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -5668,7 +5501,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -5683,7 +5515,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -5698,7 +5529,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -5713,7 +5543,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -5728,7 +5557,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -5743,7 +5571,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -5758,7 +5585,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -5773,7 +5599,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -5794,7 +5619,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -5817,7 +5641,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -5840,7 +5663,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -5863,7 +5685,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -5886,7 +5707,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -6218,7 +6038,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -6234,7 +6053,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -6250,7 +6068,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -6266,7 +6083,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -6282,7 +6098,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -6298,7 +6113,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6313,7 +6127,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6328,7 +6141,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6343,7 +6155,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6358,7 +6169,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6373,7 +6183,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6388,7 +6197,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6403,7 +6211,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6418,7 +6225,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6433,7 +6239,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6454,7 +6259,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6477,7 +6281,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6500,7 +6303,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6523,7 +6325,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6546,7 +6347,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll
index 150306ecb73a5c..e9b096e8c6c44b 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll
@@ -662,7 +662,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -675,7 +674,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -688,7 +686,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -701,7 +698,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -714,7 +710,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -727,7 +722,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -740,7 +734,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -753,7 +746,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -766,7 +758,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -779,7 +770,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -792,7 +782,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -805,7 +794,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -818,7 +806,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -831,7 +818,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -844,7 +830,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -858,7 +843,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -873,7 +857,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -888,7 +871,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -903,7 +885,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -918,7 +899,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1292,7 +1272,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1305,7 +1284,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1318,7 +1296,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1331,7 +1308,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1344,7 +1320,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1357,7 +1332,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1370,7 +1344,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1383,7 +1356,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1396,7 +1368,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1409,7 +1380,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1422,7 +1392,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -1435,7 +1404,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -1448,7 +1416,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -1461,7 +1428,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -1474,7 +1440,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -1487,7 +1452,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1501,7 +1465,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1515,7 +1478,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1529,7 +1491,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1543,7 +1504,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1952,7 +1912,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -1965,7 +1924,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -1978,7 +1936,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -1991,7 +1948,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2004,7 +1960,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2017,7 +1972,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2030,7 +1984,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2043,7 +1996,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2056,7 +2008,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2069,7 +2020,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2082,7 +2032,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2095,7 +2044,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2108,7 +2056,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2121,7 +2068,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2134,7 +2080,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2148,12 +2093,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -2164,12 +2108,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2180,12 +2123,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2196,12 +2138,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2212,12 +2153,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -2753,7 +2693,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2768,7 +2707,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2783,7 +2721,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2798,7 +2735,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2813,7 +2749,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2828,7 +2763,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2843,7 +2777,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2858,7 +2791,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2873,7 +2805,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2888,7 +2819,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2903,7 +2833,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2918,7 +2847,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2933,7 +2861,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2948,7 +2875,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2963,7 +2889,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2980,12 +2905,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3000,12 +2924,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3020,12 +2943,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3040,12 +2962,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3060,12 +2981,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3347,7 +3267,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3360,7 +3279,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3373,7 +3291,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3386,7 +3303,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3399,7 +3315,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3412,7 +3327,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3425,7 +3339,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3438,7 +3351,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3451,7 +3363,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3464,7 +3375,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3477,7 +3387,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -3490,7 +3399,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -3503,7 +3411,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -3516,7 +3423,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -3529,7 +3435,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -3543,12 +3448,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -3559,12 +3463,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -3575,12 +3478,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -3591,12 +3493,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -3607,12 +3508,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3892,7 +3792,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3905,7 +3804,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3918,7 +3816,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3931,7 +3828,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -3944,7 +3840,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -3957,7 +3852,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3970,7 +3864,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3983,7 +3876,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3996,7 +3888,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -4009,7 +3900,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -4022,7 +3912,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -4035,7 +3924,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -4048,7 +3936,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -4061,7 +3948,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -4074,7 +3960,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -4088,12 +3973,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -4104,12 +3988,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -4120,12 +4003,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -4136,12 +4018,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -4152,12 +4033,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -4739,7 +4619,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -4756,7 +4635,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -4773,7 +4651,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -4790,7 +4667,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -4807,7 +4683,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -4823,7 +4698,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -4838,7 +4712,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -4853,7 +4726,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -4868,7 +4740,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -4883,7 +4754,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -4898,7 +4768,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -4913,7 +4782,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -4928,7 +4796,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -4943,7 +4810,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -4958,7 +4824,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -4979,7 +4844,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -5002,7 +4866,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -5025,7 +4888,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -5048,7 +4910,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -5071,7 +4932,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -5659,7 +5519,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -5676,7 +5535,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -5693,7 +5551,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -5710,7 +5567,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -5727,7 +5583,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -5743,7 +5598,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -5758,7 +5612,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -5773,7 +5626,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -5788,7 +5640,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -5803,7 +5654,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -5818,7 +5668,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -5833,7 +5682,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -5848,7 +5696,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -5863,7 +5710,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -5878,7 +5724,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -5899,7 +5744,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -5922,7 +5766,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -5945,7 +5788,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -5968,7 +5810,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -5991,7 +5832,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -6573,7 +6413,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -6589,7 +6428,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -6605,7 +6443,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -6621,7 +6458,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -6637,7 +6473,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -6653,7 +6488,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6668,7 +6502,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6683,7 +6516,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6698,7 +6530,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6713,7 +6544,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6728,7 +6558,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6743,7 +6572,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6758,7 +6586,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6773,7 +6600,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6788,7 +6614,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6809,7 +6634,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6832,7 +6656,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6855,7 +6678,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6878,7 +6700,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6901,7 +6722,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -7483,7 +7303,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -7499,7 +7318,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -7515,7 +7333,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -7531,7 +7348,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -7547,7 +7363,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -7563,7 +7378,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7578,7 +7392,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7593,7 +7406,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7608,7 +7420,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -7623,7 +7434,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -7638,7 +7448,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -7653,7 +7462,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -7668,7 +7476,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -7683,7 +7490,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -7698,7 +7504,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -7719,7 +7524,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -7742,7 +7546,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -7765,7 +7568,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -7788,7 +7590,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -7811,7 +7612,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll
index 18da57f7a82c18..4f9e520997a22f 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll
@@ -1122,7 +1122,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1135,7 +1134,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1148,7 +1146,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -1161,7 +1158,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1174,7 +1170,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1187,7 +1182,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1200,7 +1194,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1213,7 +1206,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -1226,7 +1218,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1239,7 +1230,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1252,7 +1242,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -1265,7 +1254,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -1278,7 +1266,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -1291,7 +1278,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -1304,7 +1290,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -1318,7 +1303,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1333,7 +1317,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1348,7 +1331,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1363,7 +1345,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1378,7 +1359,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1907,7 +1887,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1920,7 +1899,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1933,7 +1911,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1946,7 +1923,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1959,7 +1935,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1972,7 +1947,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1985,7 +1959,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1998,7 +1971,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -2011,7 +1983,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -2024,7 +1995,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -2037,7 +2007,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -2050,7 +2019,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -2063,7 +2031,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -2076,7 +2043,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -2089,7 +2055,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -2102,7 +2067,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -2116,7 +2080,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -2130,7 +2093,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -2144,7 +2106,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -2158,7 +2119,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -2697,7 +2657,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2710,7 +2669,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2723,7 +2681,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2736,7 +2693,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2749,7 +2705,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2762,7 +2717,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2775,7 +2729,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2788,7 +2741,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2801,7 +2753,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2814,7 +2765,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2827,7 +2777,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2840,7 +2789,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2853,7 +2801,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2866,7 +2813,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2879,7 +2825,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2893,12 +2838,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -2909,12 +2853,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2925,12 +2868,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2941,12 +2883,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2957,12 +2898,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3568,7 +3508,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3583,7 +3522,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3598,7 +3536,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -3613,7 +3550,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3628,7 +3564,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3643,7 +3578,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3658,7 +3592,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3673,7 +3606,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -3688,7 +3620,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3703,7 +3634,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3718,7 +3648,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -3733,7 +3662,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -3748,7 +3676,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -3763,7 +3690,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -3778,7 +3704,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -3795,12 +3720,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3815,12 +3739,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3835,12 +3758,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3855,12 +3777,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3875,12 +3796,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -4417,7 +4337,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4430,7 +4349,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4443,7 +4361,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4456,7 +4373,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4469,7 +4385,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4482,7 +4397,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4495,7 +4409,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4508,7 +4421,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4521,7 +4433,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4534,7 +4445,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4547,7 +4457,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -4560,7 +4469,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -4573,7 +4481,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -4586,7 +4493,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -4599,7 +4505,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -4613,12 +4518,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -4629,12 +4533,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -4645,12 +4548,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -4661,12 +4563,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -4677,12 +4578,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -5217,7 +5117,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5230,7 +5129,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5243,7 +5141,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5256,7 +5153,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5269,7 +5165,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5282,7 +5177,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5295,7 +5189,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5308,7 +5201,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5321,7 +5213,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5334,7 +5225,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5347,7 +5237,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -5360,7 +5249,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -5373,7 +5261,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -5386,7 +5273,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -5399,7 +5285,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -5413,12 +5298,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -5429,12 +5313,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -5445,12 +5328,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -5461,12 +5343,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -5477,12 +5358,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -6134,7 +6014,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -6151,7 +6030,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -6168,7 +6046,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -6185,7 +6062,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -6202,7 +6078,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -6218,7 +6093,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6233,7 +6107,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6248,7 +6121,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6263,7 +6135,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6278,7 +6149,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6293,7 +6163,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6308,7 +6177,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6323,7 +6191,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6338,7 +6205,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6353,7 +6219,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6374,7 +6239,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6397,7 +6261,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6420,7 +6283,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6443,7 +6305,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6466,7 +6327,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -7124,7 +6984,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -7141,7 +7000,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -7158,7 +7016,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -7175,7 +7032,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -7192,7 +7048,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -7208,7 +7063,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7223,7 +7077,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7238,7 +7091,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7253,7 +7105,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -7268,7 +7119,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -7283,7 +7133,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -7298,7 +7147,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -7313,7 +7161,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -7328,7 +7175,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -7343,7 +7189,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -7364,7 +7209,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -7387,7 +7231,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -7410,7 +7253,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -7433,7 +7275,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -7456,7 +7297,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -8108,7 +7948,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -8124,7 +7963,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -8140,7 +7978,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -8156,7 +7993,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -8172,7 +8008,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -8188,7 +8023,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -8203,7 +8037,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -8218,7 +8051,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -8233,7 +8065,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -8248,7 +8079,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -8263,7 +8093,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -8278,7 +8107,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -8293,7 +8121,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -8308,7 +8135,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -8323,7 +8149,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -8344,7 +8169,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -8367,7 +8191,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -8390,7 +8213,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -8413,7 +8235,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -8436,7 +8257,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -9088,7 +8908,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -9104,7 +8923,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -9120,7 +8938,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -9136,7 +8953,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -9152,7 +8968,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -9168,7 +8983,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -9183,7 +8997,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -9198,7 +9011,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -9213,7 +9025,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -9228,7 +9039,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -9243,7 +9053,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -9258,7 +9067,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -9273,7 +9081,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -9288,7 +9095,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -9303,7 +9109,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -9324,7 +9129,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -9347,7 +9151,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -9370,7 +9173,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -9393,7 +9195,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -9416,7 +9217,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
index 9e6941a723f728..3437ccc8be40d7 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
@@ -1122,7 +1122,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1135,7 +1134,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1148,7 +1146,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -1161,7 +1158,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1174,7 +1170,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1187,7 +1182,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1200,7 +1194,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1213,7 +1206,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -1226,7 +1218,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1239,7 +1230,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1252,7 +1242,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -1265,7 +1254,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -1278,7 +1266,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -1291,7 +1278,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -1304,7 +1290,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -1318,7 +1303,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1333,7 +1317,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1348,7 +1331,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1363,7 +1345,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1378,7 +1359,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1907,7 +1887,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1920,7 +1899,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1933,7 +1911,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1946,7 +1923,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1959,7 +1935,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1972,7 +1947,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1985,7 +1959,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1998,7 +1971,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -2011,7 +1983,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -2024,7 +1995,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -2037,7 +2007,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -2050,7 +2019,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -2063,7 +2031,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -2076,7 +2043,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -2089,7 +2055,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -2102,7 +2067,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -2116,7 +2080,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -2130,7 +2093,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -2144,7 +2106,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -2158,7 +2119,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -2697,7 +2657,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2710,7 +2669,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2723,7 +2681,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2736,7 +2693,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2749,7 +2705,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2762,7 +2717,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2775,7 +2729,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2788,7 +2741,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2801,7 +2753,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2814,7 +2765,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2827,7 +2777,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2840,7 +2789,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2853,7 +2801,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2866,7 +2813,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2879,7 +2825,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2893,12 +2838,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -2909,12 +2853,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2925,12 +2868,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2941,12 +2883,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2957,12 +2898,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3568,7 +3508,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3583,7 +3522,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3598,7 +3536,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -3613,7 +3550,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3628,7 +3564,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3643,7 +3578,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3658,7 +3592,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3673,7 +3606,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -3688,7 +3620,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3703,7 +3634,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3718,7 +3648,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -3733,7 +3662,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -3748,7 +3676,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -3763,7 +3690,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -3778,7 +3704,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -3795,12 +3720,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3815,12 +3739,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3835,12 +3758,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3855,12 +3777,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3875,12 +3796,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -4417,7 +4337,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4430,7 +4349,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4443,7 +4361,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4456,7 +4373,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4469,7 +4385,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4482,7 +4397,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4495,7 +4409,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4508,7 +4421,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4521,7 +4433,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4534,7 +4445,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4547,7 +4457,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -4560,7 +4469,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -4573,7 +4481,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -4586,7 +4493,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -4599,7 +4505,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -4613,12 +4518,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -4629,12 +4533,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -4645,12 +4548,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -4661,12 +4563,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -4677,12 +4578,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -5217,7 +5117,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5230,7 +5129,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5243,7 +5141,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5256,7 +5153,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5269,7 +5165,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5282,7 +5177,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5295,7 +5189,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5308,7 +5201,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5321,7 +5213,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5334,7 +5225,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5347,7 +5237,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -5360,7 +5249,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -5373,7 +5261,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -5386,7 +5273,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -5399,7 +5285,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -5413,12 +5298,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -5429,12 +5313,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -5445,12 +5328,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -5461,12 +5343,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -5477,12 +5358,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -6134,7 +6014,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -6151,7 +6030,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -6168,7 +6046,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -6185,7 +6062,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -6202,7 +6078,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -6218,7 +6093,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6233,7 +6107,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6248,7 +6121,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6263,7 +6135,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6278,7 +6149,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6293,7 +6163,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6308,7 +6177,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6323,7 +6191,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6338,7 +6205,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6353,7 +6219,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6374,7 +6239,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6397,7 +6261,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6420,7 +6283,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6443,7 +6305,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6466,7 +6327,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -7124,7 +6984,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -7141,7 +7000,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -7158,7 +7016,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -7175,7 +7032,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -7192,7 +7048,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -7208,7 +7063,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7223,7 +7077,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7238,7 +7091,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7253,7 +7105,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -7268,7 +7119,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -7283,7 +7133,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -7298,7 +7147,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -7313,7 +7161,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -7328,7 +7175,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -7343,7 +7189,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -7364,7 +7209,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -7387,7 +7231,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -7410,7 +7253,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -7433,7 +7275,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -7456,7 +7297,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -8108,7 +7948,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -8124,7 +7963,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -8140,7 +7978,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -8156,7 +7993,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -8172,7 +8008,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -8188,7 +8023,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -8203,7 +8037,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -8218,7 +8051,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -8233,7 +8065,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -8248,7 +8079,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -8263,7 +8093,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -8278,7 +8107,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -8293,7 +8121,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -8308,7 +8135,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -8323,7 +8149,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -8344,7 +8169,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -8367,7 +8191,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -8390,7 +8213,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -8413,7 +8235,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -8436,7 +8257,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -9088,7 +8908,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -9104,7 +8923,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -9120,7 +8938,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -9136,7 +8953,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -9152,7 +8968,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -9168,7 +8983,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -9183,7 +8997,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -9198,7 +9011,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -9213,7 +9025,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -9228,7 +9039,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -9243,7 +9053,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -9258,7 +9067,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -9273,7 +9081,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -9288,7 +9095,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -9303,7 +9109,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -9324,7 +9129,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -9347,7 +9151,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -9370,7 +9173,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -9393,7 +9195,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -9416,7 +9217,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
index b650f8c92df057..ee5fbe39b4492c 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
@@ -682,7 +682,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -695,7 +694,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -708,7 +706,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -721,7 +718,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -734,7 +730,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -747,7 +742,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -760,7 +754,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -773,7 +766,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -786,7 +778,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -799,7 +790,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -812,7 +802,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -825,7 +814,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -838,7 +826,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -851,7 +838,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -864,7 +850,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -878,7 +863,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -893,7 +877,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -908,7 +891,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -923,7 +905,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -938,7 +919,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1222,7 +1202,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1235,7 +1214,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1248,7 +1226,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1261,7 +1238,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1274,7 +1250,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1287,7 +1262,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1300,7 +1274,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1313,7 +1286,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1326,7 +1298,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1339,7 +1310,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1352,7 +1322,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -1365,7 +1334,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -1378,7 +1346,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -1391,7 +1358,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -1404,7 +1370,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -1417,7 +1382,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1431,7 +1395,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1445,7 +1408,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1459,7 +1421,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1473,7 +1434,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1792,7 +1752,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -1805,7 +1764,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -1818,7 +1776,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -1831,7 +1788,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -1844,7 +1800,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -1857,7 +1812,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -1870,7 +1824,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -1883,7 +1836,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -1896,7 +1848,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -1909,7 +1860,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -1922,7 +1872,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -1935,7 +1884,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -1948,7 +1896,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -1961,7 +1908,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -1974,7 +1920,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -1988,12 +1933,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -2004,12 +1948,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2020,12 +1963,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2036,12 +1978,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2052,12 +1993,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -2603,7 +2543,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2618,7 +2557,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2633,7 +2571,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2648,7 +2585,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2663,7 +2599,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2678,7 +2613,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2693,7 +2627,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2708,7 +2641,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2723,7 +2655,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2738,7 +2669,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2753,7 +2683,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2768,7 +2697,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2783,7 +2711,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2798,7 +2725,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2813,7 +2739,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2830,12 +2755,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2850,12 +2774,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2870,12 +2793,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2890,12 +2812,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2910,12 +2831,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3207,7 +3127,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3220,7 +3139,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3233,7 +3151,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3246,7 +3163,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3259,7 +3175,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3272,7 +3187,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3285,7 +3199,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3298,7 +3211,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3311,7 +3223,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3324,7 +3235,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3337,7 +3247,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -3350,7 +3259,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -3363,7 +3271,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -3376,7 +3283,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -3389,7 +3295,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -3403,12 +3308,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -3419,12 +3323,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -3435,12 +3338,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -3451,12 +3353,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -3467,12 +3368,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3762,7 +3662,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3775,7 +3674,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3788,7 +3686,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3801,7 +3698,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -3814,7 +3710,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -3827,7 +3722,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3840,7 +3734,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3853,7 +3746,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3866,7 +3758,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -3879,7 +3770,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -3892,7 +3782,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -3905,7 +3794,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -3918,7 +3806,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -3931,7 +3818,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -3944,7 +3830,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -3958,12 +3843,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -3974,12 +3858,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -3990,12 +3873,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -4006,12 +3888,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -4022,12 +3903,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -4354,7 +4234,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -4371,7 +4250,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -4388,7 +4266,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -4405,7 +4282,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -4422,7 +4298,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -4438,7 +4313,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -4453,7 +4327,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -4468,7 +4341,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -4483,7 +4355,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -4498,7 +4369,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -4513,7 +4383,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -4528,7 +4397,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -4543,7 +4411,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -4558,7 +4425,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -4573,7 +4439,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -4594,7 +4459,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -4617,7 +4481,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -4640,7 +4503,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -4663,7 +4525,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -4686,7 +4547,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -5019,7 +4879,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -5036,7 +4895,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -5053,7 +4911,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -5070,7 +4927,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -5087,7 +4943,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -5103,7 +4958,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -5118,7 +4972,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -5133,7 +4986,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -5148,7 +5000,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -5163,7 +5014,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -5178,7 +5028,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -5193,7 +5042,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -5208,7 +5056,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -5223,7 +5070,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -5238,7 +5084,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -5259,7 +5104,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -5282,7 +5126,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -5305,7 +5148,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -5328,7 +5170,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -5351,7 +5192,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -5683,7 +5523,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -5699,7 +5538,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -5715,7 +5553,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -5731,7 +5568,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -5747,7 +5583,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -5763,7 +5598,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -5778,7 +5612,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -5793,7 +5626,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -5808,7 +5640,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -5823,7 +5654,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -5838,7 +5668,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -5853,7 +5682,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -5868,7 +5696,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -5883,7 +5710,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -5898,7 +5724,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -5919,7 +5744,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -5942,7 +5766,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -5965,7 +5788,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -5988,7 +5810,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6011,7 +5832,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -6343,7 +6163,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -6359,7 +6178,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -6375,7 +6193,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -6391,7 +6208,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -6407,7 +6223,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -6423,7 +6238,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6438,7 +6252,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6453,7 +6266,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6468,7 +6280,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6483,7 +6294,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6498,7 +6308,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6513,7 +6322,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6528,7 +6336,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6543,7 +6350,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6558,7 +6364,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6579,7 +6384,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6602,7 +6406,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6625,7 +6428,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6648,7 +6450,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6671,7 +6472,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll
index 866ee991f285e7..2473147509dc87 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll
@@ -1122,7 +1122,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1135,7 +1134,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1148,7 +1146,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -1161,7 +1158,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1174,7 +1170,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w8, w9, uxth
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1187,7 +1182,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1200,7 +1194,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1213,7 +1206,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -1226,7 +1218,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1239,7 +1230,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1252,7 +1242,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -1265,7 +1254,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -1278,7 +1266,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -1291,7 +1278,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -1304,7 +1290,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -1318,7 +1303,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1333,7 +1317,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1348,7 +1331,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1363,7 +1345,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1378,7 +1359,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: adds x9, x8, x9
; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1907,7 +1887,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1920,7 +1899,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1933,7 +1911,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1946,7 +1923,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1959,7 +1935,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1972,7 +1947,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1985,7 +1959,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1998,7 +1971,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -2011,7 +1983,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -2024,7 +1995,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -2037,7 +2007,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -2050,7 +2019,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -2063,7 +2031,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -2076,7 +2043,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -2089,7 +2055,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -2102,7 +2067,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -2116,7 +2080,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -2130,7 +2093,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -2144,7 +2106,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -2158,7 +2119,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x8, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -2697,7 +2657,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2710,7 +2669,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2723,7 +2681,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2736,7 +2693,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2749,7 +2705,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2762,7 +2717,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2775,7 +2729,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2788,7 +2741,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2801,7 +2753,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2814,7 +2765,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2827,7 +2777,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2840,7 +2789,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2853,7 +2801,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2866,7 +2813,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2879,7 +2825,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2893,12 +2838,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -2909,12 +2853,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2925,12 +2868,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2941,12 +2883,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2957,12 +2898,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x9, x8, x9
; -O0: and x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3568,7 +3508,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3583,7 +3522,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3598,7 +3536,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -3613,7 +3550,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3628,7 +3564,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3643,7 +3578,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3658,7 +3592,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3673,7 +3606,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -3688,7 +3620,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3703,7 +3634,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3718,7 +3648,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -3733,7 +3662,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -3748,7 +3676,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -3763,7 +3690,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -3778,7 +3704,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -3795,12 +3720,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3815,12 +3739,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3835,12 +3758,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3855,12 +3777,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3875,12 +3796,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x9, x9
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -4417,7 +4337,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4430,7 +4349,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4443,7 +4361,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4456,7 +4373,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4469,7 +4385,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4482,7 +4397,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4495,7 +4409,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4508,7 +4421,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4521,7 +4433,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4534,7 +4445,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4547,7 +4457,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -4560,7 +4469,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -4573,7 +4481,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -4586,7 +4493,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -4599,7 +4505,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -4613,12 +4518,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -4629,12 +4533,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -4645,12 +4548,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -4661,12 +4563,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -4677,12 +4578,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x9, x8, x9
; -O0: orr x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -5217,7 +5117,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5230,7 +5129,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5243,7 +5141,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5256,7 +5153,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5269,7 +5165,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5282,7 +5177,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5295,7 +5189,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5308,7 +5201,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5321,7 +5213,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5334,7 +5225,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5347,7 +5237,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -5360,7 +5249,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -5373,7 +5261,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -5386,7 +5273,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -5399,7 +5285,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -5413,12 +5298,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -5429,12 +5313,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -5445,12 +5328,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -5461,12 +5343,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -5477,12 +5358,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x9, x8, x9
; -O0: eor x8, x8, x10
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -6134,7 +6014,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -6151,7 +6030,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -6168,7 +6046,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -6185,7 +6062,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -6202,7 +6078,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -6218,7 +6093,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6233,7 +6107,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6248,7 +6121,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6263,7 +6135,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6278,7 +6149,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6293,7 +6163,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6308,7 +6177,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6323,7 +6191,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6338,7 +6205,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6353,7 +6219,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6374,7 +6239,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6397,7 +6261,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6420,7 +6283,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6443,7 +6305,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6466,7 +6327,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -7124,7 +6984,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -7141,7 +7000,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -7158,7 +7016,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -7175,7 +7032,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -7192,7 +7048,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -7208,7 +7063,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7223,7 +7077,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7238,7 +7091,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7253,7 +7105,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -7268,7 +7119,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -7283,7 +7133,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -7298,7 +7147,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -7313,7 +7161,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -7328,7 +7175,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -7343,7 +7189,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -7364,7 +7209,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -7387,7 +7231,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -7410,7 +7253,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -7433,7 +7275,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -7456,7 +7297,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -8108,7 +7948,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -8124,7 +7963,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -8140,7 +7978,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -8156,7 +7993,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -8172,7 +8008,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -8188,7 +8023,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -8203,7 +8037,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -8218,7 +8051,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -8233,7 +8065,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -8248,7 +8079,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -8263,7 +8093,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -8278,7 +8107,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -8293,7 +8121,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -8308,7 +8135,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -8323,7 +8149,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -8344,7 +8169,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -8367,7 +8191,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -8390,7 +8213,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -8413,7 +8235,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -8436,7 +8257,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -9088,7 +8908,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -9104,7 +8923,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -9120,7 +8938,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -9136,7 +8953,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -9152,7 +8968,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -9168,7 +8983,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -9183,7 +8997,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -9198,7 +9011,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -9213,7 +9025,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -9228,7 +9039,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -9243,7 +9053,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -9258,7 +9067,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -9273,7 +9081,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -9288,7 +9095,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -9303,7 +9109,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -9324,7 +9129,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -9347,7 +9151,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -9370,7 +9173,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -9393,7 +9195,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -9416,7 +9217,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: ands w11, w11, #0x1
; -O0: csel x8, x8, x10, ne
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2.ll
index c50b534d864a9c..01317e09028c35 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2.ll
@@ -1127,7 +1127,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1140,7 +1139,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1153,7 +1151,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -1166,7 +1163,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1179,7 +1175,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1192,7 +1187,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1205,7 +1199,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1218,7 +1211,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -1231,7 +1223,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1244,7 +1235,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1257,7 +1247,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -1270,7 +1259,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -1283,7 +1271,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -1296,7 +1283,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -1309,7 +1295,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -1322,7 +1307,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1336,7 +1320,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1350,7 +1333,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1364,7 +1346,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1378,7 +1359,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1912,7 +1892,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1925,7 +1904,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1938,7 +1916,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1951,7 +1928,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1964,7 +1940,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1977,7 +1952,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1990,7 +1964,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -2003,7 +1976,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -2016,7 +1988,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -2029,7 +2000,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -2042,7 +2012,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -2055,7 +2024,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -2068,7 +2036,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -2081,7 +2048,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -2094,7 +2060,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -2107,7 +2072,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -2121,7 +2085,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -2135,7 +2098,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -2149,7 +2111,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -2163,7 +2124,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -2707,7 +2667,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2720,7 +2679,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2733,7 +2691,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2746,7 +2703,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2759,7 +2715,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2772,7 +2727,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2785,7 +2739,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2798,7 +2751,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2811,7 +2763,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2824,7 +2775,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2837,7 +2787,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2850,7 +2799,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2863,7 +2811,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2876,7 +2823,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2889,7 +2835,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2903,12 +2848,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -2919,12 +2863,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2935,12 +2878,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2951,12 +2893,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2967,12 +2908,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3583,7 +3523,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3598,7 +3537,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3613,7 +3551,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -3628,7 +3565,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3643,7 +3579,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3658,7 +3593,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3673,7 +3607,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3688,7 +3621,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -3703,7 +3635,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3718,7 +3649,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3733,7 +3663,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -3748,7 +3677,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -3763,7 +3691,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -3778,7 +3705,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -3793,7 +3719,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -3810,12 +3735,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3830,12 +3754,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3850,12 +3773,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3870,12 +3792,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3890,12 +3811,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -4437,7 +4357,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4450,7 +4369,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4463,7 +4381,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4476,7 +4393,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4489,7 +4405,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4502,7 +4417,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4515,7 +4429,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4528,7 +4441,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4541,7 +4453,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4554,7 +4465,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4567,7 +4477,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -4580,7 +4489,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -4593,7 +4501,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -4606,7 +4513,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -4619,7 +4525,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -4633,12 +4538,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -4649,12 +4553,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -4665,12 +4568,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -4681,12 +4583,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -4697,12 +4598,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -5242,7 +5142,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5255,7 +5154,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5268,7 +5166,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5281,7 +5178,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5294,7 +5190,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5307,7 +5202,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5320,7 +5214,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5333,7 +5226,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5346,7 +5238,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5359,7 +5250,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5372,7 +5262,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -5385,7 +5274,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -5398,7 +5286,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -5411,7 +5298,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -5424,7 +5310,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -5438,12 +5323,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -5454,12 +5338,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -5470,12 +5353,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -5486,12 +5368,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -5502,12 +5383,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -6139,7 +6019,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -6156,7 +6035,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -6173,7 +6051,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -6190,7 +6067,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -6207,7 +6083,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -6223,7 +6098,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6238,7 +6112,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6253,7 +6126,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6268,7 +6140,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6283,7 +6154,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6298,7 +6168,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6313,7 +6182,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6328,7 +6196,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6343,7 +6210,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6358,7 +6224,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6374,7 +6239,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6392,7 +6256,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6410,7 +6273,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6428,7 +6290,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6446,7 +6307,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -7084,7 +6944,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -7101,7 +6960,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -7118,7 +6976,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -7135,7 +6992,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -7152,7 +7008,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -7168,7 +7023,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7183,7 +7037,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7198,7 +7051,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7213,7 +7065,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -7228,7 +7079,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -7243,7 +7093,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -7258,7 +7107,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -7273,7 +7121,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -7288,7 +7135,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -7303,7 +7149,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -7319,7 +7164,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -7337,7 +7181,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -7355,7 +7198,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -7373,7 +7215,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -7391,7 +7232,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -8029,7 +7869,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -8046,7 +7885,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -8063,7 +7901,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -8080,7 +7917,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -8097,7 +7933,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -8113,7 +7948,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -8128,7 +7962,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -8143,7 +7976,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -8158,7 +7990,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -8173,7 +8004,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -8188,7 +8018,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -8203,7 +8032,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -8218,7 +8046,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -8233,7 +8060,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -8248,7 +8074,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -8264,7 +8089,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -8282,7 +8106,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -8300,7 +8123,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -8318,7 +8140,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -8336,7 +8157,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -8974,7 +8794,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -8991,7 +8810,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -9008,7 +8826,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -9025,7 +8842,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -9042,7 +8858,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -9058,7 +8873,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -9073,7 +8887,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -9088,7 +8901,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -9103,7 +8915,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -9118,7 +8929,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -9133,7 +8943,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -9148,7 +8957,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -9163,7 +8971,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -9178,7 +8985,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -9193,7 +8999,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -9209,7 +9014,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -9227,7 +9031,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -9245,7 +9048,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -9263,7 +9065,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -9281,7 +9082,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll
index 6e647e34927c03..83e383f335637c 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll
@@ -632,7 +632,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -645,7 +644,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -658,7 +656,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -671,7 +668,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -684,7 +680,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -697,7 +692,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -710,7 +704,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -723,7 +716,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -736,7 +728,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -749,7 +740,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -762,7 +752,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -775,7 +764,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -788,7 +776,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -801,7 +788,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -814,7 +800,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -827,7 +812,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -841,7 +825,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -855,7 +838,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -869,7 +851,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -883,7 +864,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1257,7 +1237,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1270,7 +1249,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1283,7 +1261,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1296,7 +1273,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1309,7 +1285,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1322,7 +1297,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1335,7 +1309,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1348,7 +1321,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1361,7 +1333,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1374,7 +1345,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1387,7 +1357,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -1400,7 +1369,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -1413,7 +1381,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -1426,7 +1393,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -1439,7 +1405,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -1452,7 +1417,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1466,7 +1430,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1480,7 +1443,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1494,7 +1456,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1508,7 +1469,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1792,7 +1752,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -1805,7 +1764,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -1818,7 +1776,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -1831,7 +1788,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -1844,7 +1800,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -1857,7 +1812,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -1870,7 +1824,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -1883,7 +1836,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -1896,7 +1848,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -1909,7 +1860,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -1922,7 +1872,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -1935,7 +1884,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -1948,7 +1896,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -1961,7 +1908,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -1974,7 +1920,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -1988,12 +1933,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -2004,12 +1948,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2020,12 +1963,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2036,12 +1978,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2052,12 +1993,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -2608,7 +2548,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2623,7 +2562,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2638,7 +2576,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2653,7 +2590,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2668,7 +2604,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2683,7 +2618,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2698,7 +2632,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2713,7 +2646,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2728,7 +2660,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2743,7 +2674,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2758,7 +2688,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2773,7 +2702,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2788,7 +2716,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2803,7 +2730,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2818,7 +2744,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2835,12 +2760,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2855,12 +2779,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2875,12 +2798,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2895,12 +2817,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2915,12 +2836,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3157,7 +3077,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3170,7 +3089,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3183,7 +3101,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3196,7 +3113,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3209,7 +3125,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3222,7 +3137,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3235,7 +3149,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3248,7 +3161,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3261,7 +3173,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3274,7 +3185,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3287,7 +3197,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -3300,7 +3209,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -3313,7 +3221,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -3326,7 +3233,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -3339,7 +3245,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -3353,12 +3258,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -3369,12 +3273,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -3385,12 +3288,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -3401,12 +3303,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -3417,12 +3318,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3702,7 +3602,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3715,7 +3614,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3728,7 +3626,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3741,7 +3638,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -3754,7 +3650,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -3767,7 +3662,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3780,7 +3674,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3793,7 +3686,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3806,7 +3698,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -3819,7 +3710,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -3832,7 +3722,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -3845,7 +3734,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -3858,7 +3746,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -3871,7 +3758,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -3884,7 +3770,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -3898,12 +3783,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -3914,12 +3798,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -3930,12 +3813,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -3946,12 +3828,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -3962,12 +3843,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -4259,7 +4139,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -4276,7 +4155,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -4293,7 +4171,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -4310,7 +4187,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -4327,7 +4203,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -4343,7 +4218,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -4358,7 +4232,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -4373,7 +4246,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -4388,7 +4260,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -4403,7 +4274,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -4418,7 +4288,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -4433,7 +4302,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -4448,7 +4316,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -4463,7 +4330,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -4478,7 +4344,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -4494,7 +4359,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -4512,7 +4376,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -4530,7 +4393,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -4548,7 +4410,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -4566,7 +4427,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -4864,7 +4724,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -4881,7 +4740,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -4898,7 +4756,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -4915,7 +4772,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -4932,7 +4788,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -4948,7 +4803,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -4963,7 +4817,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -4978,7 +4831,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -4993,7 +4845,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -5008,7 +4859,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -5023,7 +4873,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -5038,7 +4887,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -5053,7 +4901,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -5068,7 +4915,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -5083,7 +4929,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -5099,7 +4944,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -5117,7 +4961,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -5135,7 +4978,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -5153,7 +4995,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -5171,7 +5012,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -5469,7 +5309,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -5486,7 +5325,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -5503,7 +5341,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -5520,7 +5357,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -5537,7 +5373,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -5553,7 +5388,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -5568,7 +5402,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -5583,7 +5416,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -5598,7 +5430,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -5613,7 +5444,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -5628,7 +5458,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -5643,7 +5472,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -5658,7 +5486,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -5673,7 +5500,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -5688,7 +5514,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -5704,7 +5529,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -5722,7 +5546,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -5740,7 +5563,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -5758,7 +5580,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -5776,7 +5597,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -6074,7 +5894,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -6091,7 +5910,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -6108,7 +5926,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -6125,7 +5942,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -6142,7 +5958,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -6158,7 +5973,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6173,7 +5987,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6188,7 +6001,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6203,7 +6015,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6218,7 +6029,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6233,7 +6043,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6248,7 +6057,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6263,7 +6071,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6278,7 +6085,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6293,7 +6099,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6309,7 +6114,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6327,7 +6131,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6345,7 +6148,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6363,7 +6165,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6381,7 +6182,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-outline_atomics.ll
index 4453de1d0e61f8..f9c1a2216dc2c1 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-outline_atomics.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-outline_atomics.ll
@@ -637,7 +637,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -650,7 +649,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -663,7 +661,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -676,7 +673,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -689,7 +685,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -702,7 +697,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -715,7 +709,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -728,7 +721,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -741,7 +733,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -754,7 +745,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -767,7 +757,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -780,7 +769,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -793,7 +781,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -806,7 +793,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -819,7 +805,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -832,7 +817,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -846,7 +830,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -860,7 +843,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -874,7 +856,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -888,7 +869,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1252,7 +1232,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1265,7 +1244,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1278,7 +1256,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1291,7 +1268,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1304,7 +1280,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1317,7 +1292,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1330,7 +1304,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1343,7 +1316,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1356,7 +1328,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1369,7 +1340,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1382,7 +1352,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -1395,7 +1364,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -1408,7 +1376,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -1421,7 +1388,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -1434,7 +1400,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -1447,7 +1412,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1461,7 +1425,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1475,7 +1438,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1489,7 +1451,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1503,7 +1464,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1902,7 +1862,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -1915,7 +1874,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -1928,7 +1886,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -1941,7 +1898,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -1954,7 +1910,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -1967,7 +1922,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -1980,7 +1934,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -1993,7 +1946,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2006,7 +1958,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2019,7 +1970,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2032,7 +1982,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2045,7 +1994,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2058,7 +2006,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2071,7 +2018,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2084,7 +2030,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2098,12 +2043,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -2114,12 +2058,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2130,12 +2073,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2146,12 +2088,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2162,12 +2103,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -2708,7 +2648,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2723,7 +2662,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2738,7 +2676,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2753,7 +2690,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2768,7 +2704,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2783,7 +2718,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2798,7 +2732,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2813,7 +2746,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2828,7 +2760,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2843,7 +2774,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2858,7 +2788,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2873,7 +2802,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2888,7 +2816,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2903,7 +2830,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2918,7 +2844,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2935,12 +2860,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2955,12 +2879,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2975,12 +2898,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2995,12 +2917,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3015,12 +2936,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3292,7 +3212,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3305,7 +3224,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3318,7 +3236,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3331,7 +3248,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3344,7 +3260,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3357,7 +3272,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3370,7 +3284,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3383,7 +3296,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3396,7 +3308,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3409,7 +3320,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3422,7 +3332,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -3435,7 +3344,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -3448,7 +3356,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -3461,7 +3368,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -3474,7 +3380,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -3488,12 +3393,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -3504,12 +3408,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -3520,12 +3423,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -3536,12 +3438,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -3552,12 +3453,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3827,7 +3727,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3840,7 +3739,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3853,7 +3751,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3866,7 +3763,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -3879,7 +3775,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -3892,7 +3787,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3905,7 +3799,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3918,7 +3811,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3931,7 +3823,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -3944,7 +3835,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -3957,7 +3847,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -3970,7 +3859,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -3983,7 +3871,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -3996,7 +3883,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -4009,7 +3895,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -4023,12 +3908,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -4039,12 +3923,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -4055,12 +3938,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -4071,12 +3953,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -4087,12 +3968,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -4654,7 +4534,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -4671,7 +4550,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -4688,7 +4566,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -4705,7 +4582,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -4722,7 +4598,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -4738,7 +4613,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -4753,7 +4627,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -4768,7 +4641,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -4783,7 +4655,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -4798,7 +4669,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -4813,7 +4683,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -4828,7 +4697,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -4843,7 +4711,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -4858,7 +4725,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -4873,7 +4739,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -4889,7 +4754,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -4907,7 +4771,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -4925,7 +4788,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -4943,7 +4805,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -4961,7 +4822,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -5529,7 +5389,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -5546,7 +5405,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -5563,7 +5421,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -5580,7 +5437,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -5597,7 +5453,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -5613,7 +5468,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -5628,7 +5482,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -5643,7 +5496,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -5658,7 +5510,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -5673,7 +5524,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -5688,7 +5538,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -5703,7 +5552,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -5718,7 +5566,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -5733,7 +5580,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -5748,7 +5594,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -5764,7 +5609,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -5782,7 +5626,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -5800,7 +5643,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -5818,7 +5660,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -5836,7 +5677,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -6404,7 +6244,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -6421,7 +6260,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -6438,7 +6276,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -6455,7 +6292,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -6472,7 +6308,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -6488,7 +6323,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6503,7 +6337,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6518,7 +6351,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6533,7 +6365,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6548,7 +6379,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6563,7 +6393,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6578,7 +6407,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6593,7 +6421,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6608,7 +6435,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6623,7 +6449,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6639,7 +6464,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6657,7 +6481,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6675,7 +6498,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6693,7 +6515,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6711,7 +6532,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -7279,7 +7099,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -7296,7 +7115,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -7313,7 +7131,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -7330,7 +7147,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -7347,7 +7163,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -7363,7 +7178,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7378,7 +7192,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7393,7 +7206,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7408,7 +7220,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -7423,7 +7234,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -7438,7 +7248,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -7453,7 +7262,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -7468,7 +7276,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -7483,7 +7290,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -7498,7 +7304,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -7514,7 +7319,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -7532,7 +7336,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -7550,7 +7353,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -7568,7 +7370,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -7586,7 +7387,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc.ll
index 85daa79c01210b..1bead6d694c652 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc.ll
@@ -1127,7 +1127,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1140,7 +1139,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1153,7 +1151,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -1166,7 +1163,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1179,7 +1175,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1192,7 +1187,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1205,7 +1199,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1218,7 +1211,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -1231,7 +1223,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1244,7 +1235,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1257,7 +1247,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -1270,7 +1259,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -1283,7 +1271,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -1296,7 +1283,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -1309,7 +1295,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -1322,7 +1307,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1336,7 +1320,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1350,7 +1333,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1364,7 +1346,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1378,7 +1359,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1912,7 +1892,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1925,7 +1904,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1938,7 +1916,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1951,7 +1928,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1964,7 +1940,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1977,7 +1952,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1990,7 +1964,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -2003,7 +1976,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -2016,7 +1988,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -2029,7 +2000,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -2042,7 +2012,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -2055,7 +2024,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -2068,7 +2036,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -2081,7 +2048,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -2094,7 +2060,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -2107,7 +2072,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -2121,7 +2085,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -2135,7 +2098,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -2149,7 +2111,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -2163,7 +2124,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -2707,7 +2667,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2720,7 +2679,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2733,7 +2691,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2746,7 +2703,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2759,7 +2715,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2772,7 +2727,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2785,7 +2739,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2798,7 +2751,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2811,7 +2763,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2824,7 +2775,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2837,7 +2787,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2850,7 +2799,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2863,7 +2811,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2876,7 +2823,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2889,7 +2835,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2903,12 +2848,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -2919,12 +2863,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2935,12 +2878,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2951,12 +2893,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2967,12 +2908,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3583,7 +3523,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3598,7 +3537,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3613,7 +3551,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -3628,7 +3565,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3643,7 +3579,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3658,7 +3593,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3673,7 +3607,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3688,7 +3621,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -3703,7 +3635,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3718,7 +3649,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3733,7 +3663,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -3748,7 +3677,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -3763,7 +3691,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -3778,7 +3705,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -3793,7 +3719,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -3810,12 +3735,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3830,12 +3754,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3850,12 +3773,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3870,12 +3792,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3890,12 +3811,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -4437,7 +4357,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4450,7 +4369,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4463,7 +4381,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4476,7 +4393,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4489,7 +4405,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4502,7 +4417,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4515,7 +4429,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4528,7 +4441,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4541,7 +4453,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4554,7 +4465,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4567,7 +4477,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -4580,7 +4489,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -4593,7 +4501,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -4606,7 +4513,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -4619,7 +4525,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -4633,12 +4538,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -4649,12 +4553,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -4665,12 +4568,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -4681,12 +4583,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -4697,12 +4598,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -5242,7 +5142,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5255,7 +5154,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5268,7 +5166,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5281,7 +5178,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5294,7 +5190,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5307,7 +5202,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5320,7 +5214,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5333,7 +5226,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5346,7 +5238,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5359,7 +5250,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5372,7 +5262,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -5385,7 +5274,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -5398,7 +5286,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -5411,7 +5298,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -5424,7 +5310,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -5438,12 +5323,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -5454,12 +5338,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -5470,12 +5353,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -5486,12 +5368,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -5502,12 +5383,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -6139,7 +6019,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -6156,7 +6035,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -6173,7 +6051,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -6190,7 +6067,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -6207,7 +6083,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -6223,7 +6098,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6238,7 +6112,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6253,7 +6126,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6268,7 +6140,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6283,7 +6154,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6298,7 +6168,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6313,7 +6182,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6328,7 +6196,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6343,7 +6210,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6358,7 +6224,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6374,7 +6239,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6392,7 +6256,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6410,7 +6273,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6428,7 +6290,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6446,7 +6307,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -7084,7 +6944,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -7101,7 +6960,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -7118,7 +6976,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -7135,7 +6992,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -7152,7 +7008,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -7168,7 +7023,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7183,7 +7037,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7198,7 +7051,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7213,7 +7065,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -7228,7 +7079,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -7243,7 +7093,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -7258,7 +7107,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -7273,7 +7121,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -7288,7 +7135,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -7303,7 +7149,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -7319,7 +7164,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -7337,7 +7181,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -7355,7 +7198,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -7373,7 +7215,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -7391,7 +7232,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -8029,7 +7869,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -8046,7 +7885,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -8063,7 +7901,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -8080,7 +7917,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -8097,7 +7933,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -8113,7 +7948,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -8128,7 +7962,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -8143,7 +7976,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -8158,7 +7990,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -8173,7 +8004,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -8188,7 +8018,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -8203,7 +8032,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -8218,7 +8046,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -8233,7 +8060,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -8248,7 +8074,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -8264,7 +8089,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -8282,7 +8106,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -8300,7 +8123,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -8318,7 +8140,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -8336,7 +8157,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -8974,7 +8794,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -8991,7 +8810,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -9008,7 +8826,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -9025,7 +8842,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -9042,7 +8858,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -9058,7 +8873,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -9073,7 +8887,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -9088,7 +8901,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -9103,7 +8915,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -9118,7 +8929,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -9133,7 +8943,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -9148,7 +8957,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -9163,7 +8971,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -9178,7 +8985,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -9193,7 +8999,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -9209,7 +9014,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -9227,7 +9031,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -9245,7 +9048,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -9263,7 +9065,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -9281,7 +9082,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll
index 9780b48ce4b671..51d9766f6a8f92 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll
@@ -1127,7 +1127,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1140,7 +1139,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1153,7 +1151,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -1166,7 +1163,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1179,7 +1175,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1192,7 +1187,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1205,7 +1199,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1218,7 +1211,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -1231,7 +1223,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1244,7 +1235,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1257,7 +1247,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -1270,7 +1259,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -1283,7 +1271,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -1296,7 +1283,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -1309,7 +1295,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -1322,7 +1307,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1336,7 +1320,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1350,7 +1333,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1364,7 +1346,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1378,7 +1359,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1912,7 +1892,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1925,7 +1904,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1938,7 +1916,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1951,7 +1928,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1964,7 +1940,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1977,7 +1952,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1990,7 +1964,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -2003,7 +1976,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -2016,7 +1988,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -2029,7 +2000,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -2042,7 +2012,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -2055,7 +2024,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -2068,7 +2036,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -2081,7 +2048,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -2094,7 +2060,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -2107,7 +2072,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -2121,7 +2085,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -2135,7 +2098,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -2149,7 +2111,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -2163,7 +2124,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -2707,7 +2667,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2720,7 +2679,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2733,7 +2691,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2746,7 +2703,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2759,7 +2715,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2772,7 +2727,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2785,7 +2739,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2798,7 +2751,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2811,7 +2763,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2824,7 +2775,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2837,7 +2787,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2850,7 +2799,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2863,7 +2811,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2876,7 +2823,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2889,7 +2835,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2903,12 +2848,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -2919,12 +2863,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2935,12 +2878,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2951,12 +2893,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2967,12 +2908,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3583,7 +3523,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3598,7 +3537,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3613,7 +3551,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -3628,7 +3565,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3643,7 +3579,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3658,7 +3593,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3673,7 +3607,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3688,7 +3621,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -3703,7 +3635,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3718,7 +3649,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3733,7 +3663,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -3748,7 +3677,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -3763,7 +3691,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -3778,7 +3705,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -3793,7 +3719,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -3810,12 +3735,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3830,12 +3754,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3850,12 +3773,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3870,12 +3792,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3890,12 +3811,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -4437,7 +4357,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4450,7 +4369,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4463,7 +4381,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4476,7 +4393,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4489,7 +4405,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4502,7 +4417,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4515,7 +4429,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4528,7 +4441,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4541,7 +4453,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4554,7 +4465,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4567,7 +4477,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -4580,7 +4489,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -4593,7 +4501,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -4606,7 +4513,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -4619,7 +4525,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -4633,12 +4538,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -4649,12 +4553,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -4665,12 +4568,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -4681,12 +4583,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -4697,12 +4598,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -5242,7 +5142,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5255,7 +5154,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5268,7 +5166,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5281,7 +5178,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5294,7 +5190,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5307,7 +5202,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5320,7 +5214,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5333,7 +5226,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5346,7 +5238,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5359,7 +5250,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5372,7 +5262,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -5385,7 +5274,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -5398,7 +5286,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -5411,7 +5298,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -5424,7 +5310,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -5438,12 +5323,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -5454,12 +5338,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -5470,12 +5353,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -5486,12 +5368,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -5502,12 +5383,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -6139,7 +6019,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -6156,7 +6035,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -6173,7 +6051,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -6190,7 +6067,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -6207,7 +6083,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -6223,7 +6098,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6238,7 +6112,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6253,7 +6126,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6268,7 +6140,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6283,7 +6154,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6298,7 +6168,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6313,7 +6182,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6328,7 +6196,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6343,7 +6210,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6358,7 +6224,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6374,7 +6239,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6392,7 +6256,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6410,7 +6273,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6428,7 +6290,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6446,7 +6307,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -7084,7 +6944,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -7101,7 +6960,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -7118,7 +6976,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -7135,7 +6992,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -7152,7 +7008,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -7168,7 +7023,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7183,7 +7037,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7198,7 +7051,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7213,7 +7065,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -7228,7 +7079,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -7243,7 +7093,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -7258,7 +7107,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -7273,7 +7121,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -7288,7 +7135,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -7303,7 +7149,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -7319,7 +7164,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -7337,7 +7181,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -7355,7 +7198,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -7373,7 +7215,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -7391,7 +7232,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -8029,7 +7869,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -8046,7 +7885,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -8063,7 +7901,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -8080,7 +7917,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -8097,7 +7933,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -8113,7 +7948,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -8128,7 +7962,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -8143,7 +7976,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -8158,7 +7990,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -8173,7 +8004,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -8188,7 +8018,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -8203,7 +8032,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -8218,7 +8046,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -8233,7 +8060,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -8248,7 +8074,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -8264,7 +8089,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -8282,7 +8106,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -8300,7 +8123,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -8318,7 +8140,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -8336,7 +8157,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -8974,7 +8794,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -8991,7 +8810,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -9008,7 +8826,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -9025,7 +8842,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -9042,7 +8858,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -9058,7 +8873,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -9073,7 +8887,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -9088,7 +8901,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -9103,7 +8915,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -9118,7 +8929,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -9133,7 +8943,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -9148,7 +8957,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -9163,7 +8971,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -9178,7 +8985,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -9193,7 +8999,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -9209,7 +9014,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -9227,7 +9031,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -9245,7 +9048,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -9263,7 +9065,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -9281,7 +9082,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll
index 0bb582fd33216c..0c3ed9b0f1de0f 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll
@@ -657,7 +657,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -670,7 +669,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -683,7 +681,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -696,7 +693,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -709,7 +705,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -722,7 +717,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -735,7 +729,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -748,7 +741,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -761,7 +753,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -774,7 +765,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -787,7 +777,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -800,7 +789,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -813,7 +801,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -826,7 +813,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -839,7 +825,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -852,7 +837,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -866,7 +850,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -880,7 +863,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -894,7 +876,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -908,7 +889,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1282,7 +1262,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1295,7 +1274,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1308,7 +1286,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1321,7 +1298,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1334,7 +1310,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1347,7 +1322,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1360,7 +1334,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1373,7 +1346,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1386,7 +1358,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1399,7 +1370,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1412,7 +1382,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -1425,7 +1394,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -1438,7 +1406,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -1451,7 +1418,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -1464,7 +1430,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -1477,7 +1442,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1491,7 +1455,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1505,7 +1468,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1519,7 +1481,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1533,7 +1494,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1842,7 +1802,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -1855,7 +1814,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -1868,7 +1826,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -1881,7 +1838,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -1894,7 +1850,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -1907,7 +1862,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -1920,7 +1874,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -1933,7 +1886,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -1946,7 +1898,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -1959,7 +1910,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -1972,7 +1922,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -1985,7 +1934,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -1998,7 +1946,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2011,7 +1958,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2024,7 +1970,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2038,12 +1983,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -2054,12 +1998,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2070,12 +2013,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2086,12 +2028,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2102,12 +2043,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -2658,7 +2598,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2673,7 +2612,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2688,7 +2626,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2703,7 +2640,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2718,7 +2654,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2733,7 +2668,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2748,7 +2682,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2763,7 +2696,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2778,7 +2710,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2793,7 +2724,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2808,7 +2738,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2823,7 +2752,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2838,7 +2766,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2853,7 +2780,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2868,7 +2794,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2885,12 +2810,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2905,12 +2829,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2925,12 +2848,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2945,12 +2867,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -2965,12 +2886,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3252,7 +3172,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3265,7 +3184,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3278,7 +3196,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3291,7 +3208,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3304,7 +3220,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3317,7 +3232,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -3330,7 +3244,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -3343,7 +3256,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -3356,7 +3268,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -3369,7 +3280,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -3382,7 +3292,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -3395,7 +3304,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -3408,7 +3316,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -3421,7 +3328,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -3434,7 +3340,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -3448,12 +3353,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -3464,12 +3368,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -3480,12 +3383,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -3496,12 +3398,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -3512,12 +3413,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3797,7 +3697,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3810,7 +3709,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3823,7 +3721,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3836,7 +3733,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -3849,7 +3745,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -3862,7 +3757,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -3875,7 +3769,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -3888,7 +3781,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -3901,7 +3793,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -3914,7 +3805,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -3927,7 +3817,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -3940,7 +3829,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -3953,7 +3841,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -3966,7 +3853,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -3979,7 +3865,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -3993,12 +3878,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -4009,12 +3893,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -4025,12 +3908,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -4041,12 +3923,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -4057,12 +3938,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -4354,7 +4234,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -4371,7 +4250,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -4388,7 +4266,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -4405,7 +4282,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -4422,7 +4298,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -4438,7 +4313,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -4453,7 +4327,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -4468,7 +4341,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -4483,7 +4355,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -4498,7 +4369,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -4513,7 +4383,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -4528,7 +4397,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -4543,7 +4411,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -4558,7 +4425,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -4573,7 +4439,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -4589,7 +4454,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -4607,7 +4471,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -4625,7 +4488,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -4643,7 +4505,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -4661,7 +4522,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -4959,7 +4819,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -4976,7 +4835,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -4993,7 +4851,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -5010,7 +4867,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -5027,7 +4883,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -5043,7 +4898,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -5058,7 +4912,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -5073,7 +4926,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -5088,7 +4940,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -5103,7 +4954,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -5118,7 +4968,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -5133,7 +4982,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -5148,7 +4996,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -5163,7 +5010,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -5178,7 +5024,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -5194,7 +5039,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -5212,7 +5056,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -5230,7 +5073,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -5248,7 +5090,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -5266,7 +5107,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -5564,7 +5404,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -5581,7 +5420,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -5598,7 +5436,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -5615,7 +5452,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -5632,7 +5468,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -5648,7 +5483,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -5663,7 +5497,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -5678,7 +5511,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -5693,7 +5525,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -5708,7 +5539,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -5723,7 +5553,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -5738,7 +5567,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -5753,7 +5581,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -5768,7 +5595,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -5783,7 +5609,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -5799,7 +5624,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -5817,7 +5641,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -5835,7 +5658,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -5853,7 +5675,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -5871,7 +5692,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -6169,7 +5989,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -6186,7 +6005,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -6203,7 +6021,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -6220,7 +6037,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -6237,7 +6053,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -6253,7 +6068,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6268,7 +6082,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6283,7 +6096,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6298,7 +6110,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6313,7 +6124,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6328,7 +6138,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6343,7 +6152,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6358,7 +6166,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6373,7 +6180,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6388,7 +6194,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6404,7 +6209,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6422,7 +6226,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6440,7 +6243,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6458,7 +6260,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6476,7 +6277,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a.ll
index 171ede54699795..a58e5a987bb4c9 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a.ll
@@ -1127,7 +1127,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1140,7 +1139,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1153,7 +1151,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_release:
; -O1: add w8, w0, w20
@@ -1166,7 +1163,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1179,7 +1175,6 @@ define dso_local i16 @atomicrmw_add_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1192,7 +1187,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic:
; -O1: add w8, w0, w20
@@ -1205,7 +1199,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire:
; -O1: add w8, w0, w20
@@ -1218,7 +1211,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_release:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_release:
; -O1: add w8, w0, w20
@@ -1231,7 +1223,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel:
; -O1: add w8, w0, w20
@@ -1244,7 +1235,6 @@ define dso_local i32 @atomicrmw_add_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O0: add w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst:
; -O1: add w8, w0, w20
@@ -1257,7 +1247,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic:
; -O1: add x8, x0, x20
@@ -1270,7 +1259,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire:
; -O1: add x8, x0, x20
@@ -1283,7 +1271,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_release:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_release:
; -O1: add x8, x0, x20
@@ -1296,7 +1283,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel:
; -O1: add x8, x0, x20
@@ -1309,7 +1295,6 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O0: add x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst:
; -O1: add x8, x0, x20
@@ -1322,7 +1307,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -1336,7 +1320,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -1350,7 +1333,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -1364,7 +1346,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -1378,7 +1359,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O0: adds x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -1912,7 +1892,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1925,7 +1904,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -1938,7 +1916,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_release:
; -O1: sub w8, w0, w20
@@ -1951,7 +1928,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -1964,7 +1940,6 @@ define dso_local i16 @atomicrmw_sub_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -1977,7 +1952,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic:
; -O1: sub w8, w0, w20
@@ -1990,7 +1964,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire:
; -O1: sub w8, w0, w20
@@ -2003,7 +1976,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_release:
; -O1: sub w8, w0, w20
@@ -2016,7 +1988,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel:
; -O1: sub w8, w0, w20
@@ -2029,7 +2000,6 @@ define dso_local i32 @atomicrmw_sub_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O0: subs w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst:
; -O1: sub w8, w0, w20
@@ -2042,7 +2012,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic:
; -O1: sub x8, x0, x20
@@ -2055,7 +2024,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire:
; -O1: sub x8, x0, x20
@@ -2068,7 +2036,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_release:
; -O1: sub x8, x0, x20
@@ -2081,7 +2048,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel:
; -O1: sub x8, x0, x20
@@ -2094,7 +2060,6 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O0: subs x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst:
; -O1: sub x8, x0, x20
@@ -2107,7 +2072,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -2121,7 +2085,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -2135,7 +2098,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -2149,7 +2111,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -2163,7 +2124,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O0: subs x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -2707,7 +2667,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2720,7 +2679,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2733,7 +2691,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -2746,7 +2703,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2759,7 +2715,6 @@ define dso_local i16 @atomicrmw_and_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2772,7 +2727,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -2785,7 +2739,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -2798,7 +2751,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_release:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -2811,7 +2763,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -2824,7 +2775,6 @@ define dso_local i32 @atomicrmw_and_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O0: and w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -2837,7 +2787,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -2850,7 +2799,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -2863,7 +2811,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_release:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -2876,7 +2823,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -2889,7 +2835,6 @@ define dso_local i64 @atomicrmw_and_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O0: and x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -2903,12 +2848,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -2919,12 +2863,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -2935,12 +2878,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -2951,12 +2893,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -2967,12 +2908,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: and x8, x11, x8
; -O0: and x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -3583,7 +3523,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3598,7 +3537,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3613,7 +3551,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_release:
; -O1: and w8, w0, w20
@@ -3628,7 +3565,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3643,7 +3579,6 @@ define dso_local i16 @atomicrmw_nand_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3658,7 +3593,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic:
; -O1: and w8, w0, w20
@@ -3673,7 +3607,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire:
; -O1: and w8, w0, w20
@@ -3688,7 +3621,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_release:
; -O1: and w8, w0, w20
@@ -3703,7 +3635,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel:
; -O1: and w8, w0, w20
@@ -3718,7 +3649,6 @@ define dso_local i32 @atomicrmw_nand_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: and w8, w9, w8
; -O0: mvn w8, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst:
; -O1: and w8, w0, w20
@@ -3733,7 +3663,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic:
; -O1: and x8, x0, x20
@@ -3748,7 +3677,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire:
; -O1: and x8, x0, x20
@@ -3763,7 +3691,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_release:
; -O1: and x8, x0, x20
@@ -3778,7 +3705,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel:
; -O1: and x8, x0, x20
@@ -3793,7 +3719,6 @@ define dso_local i64 @atomicrmw_nand_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: and x8, x9, x8
; -O0: mvn x8, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst:
; -O1: and x8, x0, x20
@@ -3810,12 +3735,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3830,12 +3754,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3850,12 +3773,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3870,12 +3792,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -3890,12 +3811,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: mvn x8, x8
; -O0: mvn x9, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: and x8, x0, x21
-; -O1: and x9, x1, x19
+; -O1: and x8, x1, x19
+; -O1: and x9, x0, x21
; -O1: mvn x8, x8
; -O1: mvn x9, x9
; -O1: bl __atomic_compare_exchange
@@ -4437,7 +4357,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_monotonic(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4450,7 +4369,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acquire(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4463,7 +4381,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_release(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4476,7 +4393,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4489,7 +4405,6 @@ define dso_local i16 @atomicrmw_or_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
; -O0-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4502,7 +4417,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_monotonic(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic:
; -O1: orr w8, w0, w20
@@ -4515,7 +4429,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acquire(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire:
; -O1: orr w8, w0, w20
@@ -4528,7 +4441,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_release(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_release:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_release:
; -O1: orr w8, w0, w20
@@ -4541,7 +4453,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel:
; -O1: orr w8, w0, w20
@@ -4554,7 +4465,6 @@ define dso_local i32 @atomicrmw_or_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
; -O0-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O0: orr w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst:
; -O1: orr w8, w0, w20
@@ -4567,7 +4477,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_monotonic(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic:
; -O1: orr x8, x0, x20
@@ -4580,7 +4489,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acquire(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire:
; -O1: orr x8, x0, x20
@@ -4593,7 +4501,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_release(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_release:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_release:
; -O1: orr x8, x0, x20
@@ -4606,7 +4513,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel:
; -O1: orr x8, x0, x20
@@ -4619,7 +4525,6 @@ define dso_local i64 @atomicrmw_or_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
; -O0-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O0: orr x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst:
; -O1: orr x8, x0, x20
@@ -4633,12 +4538,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_monotonic(ptr %ptr, i128 %val
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -4649,12 +4553,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acquire(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -4665,12 +4568,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_release(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -4681,12 +4583,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_acq_rel(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -4697,12 +4598,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
; -O0: orr x8, x11, x8
; -O0: orr x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: orr x8, x0, x21
-; -O1: orr x9, x1, x19
+; -O1: orr x8, x1, x19
+; -O1: orr x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -5242,7 +5142,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5255,7 +5154,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5268,7 +5166,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5281,7 +5178,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5294,7 +5190,6 @@ define dso_local i16 @atomicrmw_xor_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5307,7 +5202,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic:
; -O1: eor w8, w0, w20
@@ -5320,7 +5214,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire:
; -O1: eor w8, w0, w20
@@ -5333,7 +5226,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_release:
; -O1: eor w8, w0, w20
@@ -5346,7 +5238,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel:
; -O1: eor w8, w0, w20
@@ -5359,7 +5250,6 @@ define dso_local i32 @atomicrmw_xor_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O0: eor w8, w9, w8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst:
; -O1: eor w8, w0, w20
@@ -5372,7 +5262,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic:
; -O1: eor x8, x0, x20
@@ -5385,7 +5274,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire:
; -O1: eor x8, x0, x20
@@ -5398,7 +5286,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_release:
; -O1: eor x8, x0, x20
@@ -5411,7 +5298,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel:
; -O1: eor x8, x0, x20
@@ -5424,7 +5310,6 @@ define dso_local i64 @atomicrmw_xor_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O0: eor x8, x9, x8
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst:
; -O1: eor x8, x0, x20
@@ -5438,12 +5323,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1
ret i128 %r
@@ -5454,12 +5338,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1
ret i128 %r
@@ -5470,12 +5353,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value release, align 1
ret i128 %r
@@ -5486,12 +5368,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1
ret i128 %r
@@ -5502,12 +5383,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: eor x8, x11, x8
; -O0: eor x9, x10, x9
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
-; -O1: eor x8, x0, x21
-; -O1: eor x9, x1, x19
+; -O1: eor x8, x1, x19
+; -O1: eor x9, x0, x21
; -O1: bl __atomic_compare_exchange
%r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1
ret i128 %r
@@ -6139,7 +6019,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -6156,7 +6035,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -6173,7 +6051,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -6190,7 +6067,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -6207,7 +6083,6 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -6223,7 +6098,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -6238,7 +6112,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -6253,7 +6126,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -6268,7 +6140,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -6283,7 +6154,6 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -6298,7 +6168,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -6313,7 +6182,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -6328,7 +6196,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -6343,7 +6210,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -6358,7 +6224,6 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, gt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -6374,7 +6239,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -6392,7 +6256,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -6410,7 +6273,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -6428,7 +6290,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -6446,7 +6307,6 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, lt
; -O0: csel x9, x10, x9, lt
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -7084,7 +6944,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic:
; -O1: sxth w8, w0
@@ -7101,7 +6960,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire:
; -O1: sxth w8, w0
@@ -7118,7 +6976,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_release:
; -O1: sxth w8, w0
@@ -7135,7 +6992,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
; -O1: sxth w8, w0
@@ -7152,7 +7008,6 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, sxth
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
; -O1: sxth w8, w0
@@ -7168,7 +7023,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -7183,7 +7037,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -7198,7 +7051,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -7213,7 +7065,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -7228,7 +7079,6 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -7243,7 +7093,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -7258,7 +7107,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -7273,7 +7121,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -7288,7 +7135,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -7303,7 +7149,6 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, le
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -7319,7 +7164,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -7337,7 +7181,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -7355,7 +7198,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -7373,7 +7215,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -7391,7 +7232,6 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
; -O0: csel x8, x11, x8, ge
; -O0: csel x9, x10, x9, ge
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -8029,7 +7869,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -8046,7 +7885,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -8063,7 +7901,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -8080,7 +7917,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -8097,7 +7933,6 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -8113,7 +7948,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -8128,7 +7962,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -8143,7 +7976,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -8158,7 +7990,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -8173,7 +8004,6 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -8188,7 +8018,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -8203,7 +8032,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -8218,7 +8046,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -8233,7 +8060,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -8248,7 +8074,6 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, hi
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -8264,7 +8089,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -8282,7 +8106,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -8300,7 +8123,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -8318,7 +8140,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -8336,7 +8157,6 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, lo
; -O0: csel x9, x10, x9, lo
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
@@ -8974,7 +8794,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
; -O1: and w8, w0, #0xffff
@@ -8991,7 +8810,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire:
; -O1: and w8, w0, #0xffff
@@ -9008,7 +8826,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_release:
; -O1: and w8, w0, #0xffff
@@ -9025,7 +8842,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
; -O1: and w8, w0, #0xffff
@@ -9042,7 +8858,6 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
; -O0: subs w10, w10, w8, uxth
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
; -O1: and w8, w0, #0xffff
@@ -9058,7 +8873,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
; -O1: cmp w0, w20
@@ -9073,7 +8887,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire:
; -O1: cmp w0, w20
@@ -9088,7 +8901,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_release:
; -O1: cmp w0, w20
@@ -9103,7 +8915,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
; -O1: cmp w0, w20
@@ -9118,7 +8929,6 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
; -O0: subs w10, w9, w8
; -O0: csel w8, w9, w8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
; -O1: cmp w0, w20
@@ -9133,7 +8943,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
; -O1: cmp x0, x20
@@ -9148,7 +8957,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire:
; -O1: cmp x0, x20
@@ -9163,7 +8971,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_release:
; -O1: cmp x0, x20
@@ -9178,7 +8985,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
; -O1: cmp x0, x20
@@ -9193,7 +8999,6 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
; -O0: subs x10, x9, x8
; -O0: csel x8, x9, x8, ls
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
; -O1: cmp x0, x20
@@ -9209,7 +9014,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
; -O1: ldp x0, x1, [x0]
@@ -9227,7 +9031,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire:
; -O1: ldp x0, x1, [x0]
@@ -9245,7 +9048,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_release:
; -O1: ldp x0, x1, [x0]
@@ -9263,7 +9065,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
; -O1: ldp x0, x1, [x0]
@@ -9281,7 +9082,6 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %val
; -O0: csel x8, x11, x8, hs
; -O0: csel x9, x10, x9, hs
; -O0: bl __atomic_compare_exchange
-; -O0: and w8, w0, #0xff
;
; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
; -O1: ldp x0, x1, [x0]
diff --git a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
index c3db86c0c20eb3..a38ade7cdbf06b 100644
--- a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
+++ b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
@@ -8548,8 +8548,8 @@ define i64 @test_max_i64() {
; CHECK-THUMB8BASE: @ %bb.0: @ %entry
; CHECK-THUMB8BASE-NEXT: .save {r4, lr}
; CHECK-THUMB8BASE-NEXT: push {r4, lr}
-; CHECK-THUMB8BASE-NEXT: .pad #88
-; CHECK-THUMB8BASE-NEXT: sub sp, #88
+; CHECK-THUMB8BASE-NEXT: .pad #72
+; CHECK-THUMB8BASE-NEXT: sub sp, #72
; CHECK-THUMB8BASE-NEXT: movw r1, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r1, :upper16:atomic_i64
; CHECK-THUMB8BASE-NEXT: ldr r0, [r1, #4]
@@ -8600,42 +8600,33 @@ define i64 @test_max_i64() {
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB40_7: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB40_1 Depth=1
+; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #36] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #40] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r4, [sp, #80]
-; CHECK-THUMB8BASE-NEXT: str r3, [sp, #84]
-; CHECK-THUMB8BASE-NEXT: str r2, [sp, #76]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #72]
-; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #76]
-; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #72]
+; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #24] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r4, [sp, #64]
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #68]
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #4]
; CHECK-THUMB8BASE-NEXT: str r0, [sp]
; CHECK-THUMB8BASE-NEXT: movw r0, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r0, :upper16:atomic_i64
-; CHECK-THUMB8BASE-NEXT: add r1, sp, #80
+; CHECK-THUMB8BASE-NEXT: add r1, sp, #64
; CHECK-THUMB8BASE-NEXT: bl __atomic_compare_exchange_8
; CHECK-THUMB8BASE-NEXT: mov r2, r0
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #84]
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #68]
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #80]
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #64]
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #68]
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #64]
; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: lsls r2, r2, #24
; CHECK-THUMB8BASE-NEXT: cmp r2, #0
; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: bne .LBB40_1
+; CHECK-THUMB8BASE-NEXT: beq .LBB40_1
; CHECK-THUMB8BASE-NEXT: b .LBB40_8
; CHECK-THUMB8BASE-NEXT: .LBB40_8: @ %atomicrmw.end
; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: add sp, #88
+; CHECK-THUMB8BASE-NEXT: add sp, #72
; CHECK-THUMB8BASE-NEXT: pop {r4, pc}
entry:
%0 = atomicrmw max ptr @atomic_i64, i64 1 monotonic
@@ -8873,8 +8864,8 @@ define i64 @test_min_i64() {
; CHECK-THUMB8BASE: @ %bb.0: @ %entry
; CHECK-THUMB8BASE-NEXT: .save {r4, lr}
; CHECK-THUMB8BASE-NEXT: push {r4, lr}
-; CHECK-THUMB8BASE-NEXT: .pad #88
-; CHECK-THUMB8BASE-NEXT: sub sp, #88
+; CHECK-THUMB8BASE-NEXT: .pad #72
+; CHECK-THUMB8BASE-NEXT: sub sp, #72
; CHECK-THUMB8BASE-NEXT: movw r1, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r1, :upper16:atomic_i64
; CHECK-THUMB8BASE-NEXT: ldr r0, [r1, #4]
@@ -8925,42 +8916,33 @@ define i64 @test_min_i64() {
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB41_7: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB41_1 Depth=1
+; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #48] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #36] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #40] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r4, [sp, #80]
-; CHECK-THUMB8BASE-NEXT: str r3, [sp, #84]
-; CHECK-THUMB8BASE-NEXT: str r2, [sp, #76]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #72]
-; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #76]
-; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #72]
+; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #24] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r4, [sp, #64]
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #68]
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #4]
; CHECK-THUMB8BASE-NEXT: str r0, [sp]
; CHECK-THUMB8BASE-NEXT: movw r0, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r0, :upper16:atomic_i64
-; CHECK-THUMB8BASE-NEXT: add r1, sp, #80
+; CHECK-THUMB8BASE-NEXT: add r1, sp, #64
; CHECK-THUMB8BASE-NEXT: bl __atomic_compare_exchange_8
; CHECK-THUMB8BASE-NEXT: mov r2, r0
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #84]
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #68]
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #80]
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #64]
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #68]
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #64]
; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: lsls r2, r2, #24
; CHECK-THUMB8BASE-NEXT: cmp r2, #0
; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: bne .LBB41_1
+; CHECK-THUMB8BASE-NEXT: beq .LBB41_1
; CHECK-THUMB8BASE-NEXT: b .LBB41_8
; CHECK-THUMB8BASE-NEXT: .LBB41_8: @ %atomicrmw.end
; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: add sp, #88
+; CHECK-THUMB8BASE-NEXT: add sp, #72
; CHECK-THUMB8BASE-NEXT: pop {r4, pc}
entry:
%0 = atomicrmw min ptr @atomic_i64, i64 1 monotonic
@@ -9198,8 +9180,8 @@ define i64 @test_umax_i64() {
; CHECK-THUMB8BASE: @ %bb.0: @ %entry
; CHECK-THUMB8BASE-NEXT: .save {r4, lr}
; CHECK-THUMB8BASE-NEXT: push {r4, lr}
-; CHECK-THUMB8BASE-NEXT: .pad #88
-; CHECK-THUMB8BASE-NEXT: sub sp, #88
+; CHECK-THUMB8BASE-NEXT: .pad #72
+; CHECK-THUMB8BASE-NEXT: sub sp, #72
; CHECK-THUMB8BASE-NEXT: movw r1, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r1, :upper16:atomic_i64
; CHECK-THUMB8BASE-NEXT: ldr r0, [r1, #4]
@@ -9250,42 +9232,33 @@ define i64 @test_umax_i64() {
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB42_7: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB42_1 Depth=1
+; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #36] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #40] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r4, [sp, #80]
-; CHECK-THUMB8BASE-NEXT: str r3, [sp, #84]
-; CHECK-THUMB8BASE-NEXT: str r2, [sp, #76]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #72]
-; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #76]
-; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #72]
+; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #24] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r4, [sp, #64]
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #68]
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #4]
; CHECK-THUMB8BASE-NEXT: str r0, [sp]
; CHECK-THUMB8BASE-NEXT: movw r0, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r0, :upper16:atomic_i64
-; CHECK-THUMB8BASE-NEXT: add r1, sp, #80
+; CHECK-THUMB8BASE-NEXT: add r1, sp, #64
; CHECK-THUMB8BASE-NEXT: bl __atomic_compare_exchange_8
; CHECK-THUMB8BASE-NEXT: mov r2, r0
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #84]
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #68]
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #80]
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #64]
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #68]
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #64]
; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: lsls r2, r2, #24
; CHECK-THUMB8BASE-NEXT: cmp r2, #0
; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: bne .LBB42_1
+; CHECK-THUMB8BASE-NEXT: beq .LBB42_1
; CHECK-THUMB8BASE-NEXT: b .LBB42_8
; CHECK-THUMB8BASE-NEXT: .LBB42_8: @ %atomicrmw.end
; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: add sp, #88
+; CHECK-THUMB8BASE-NEXT: add sp, #72
; CHECK-THUMB8BASE-NEXT: pop {r4, pc}
entry:
%0 = atomicrmw umax ptr @atomic_i64, i64 1 monotonic
@@ -9523,8 +9496,8 @@ define i64 @test_umin_i64() {
; CHECK-THUMB8BASE: @ %bb.0: @ %entry
; CHECK-THUMB8BASE-NEXT: .save {r4, lr}
; CHECK-THUMB8BASE-NEXT: push {r4, lr}
-; CHECK-THUMB8BASE-NEXT: .pad #88
-; CHECK-THUMB8BASE-NEXT: sub sp, #88
+; CHECK-THUMB8BASE-NEXT: .pad #72
+; CHECK-THUMB8BASE-NEXT: sub sp, #72
; CHECK-THUMB8BASE-NEXT: movw r1, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r1, :upper16:atomic_i64
; CHECK-THUMB8BASE-NEXT: ldr r0, [r1, #4]
@@ -9575,42 +9548,33 @@ define i64 @test_umin_i64() {
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB43_7: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB43_1 Depth=1
+; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #48] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #36] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #40] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r4, [sp, #80]
-; CHECK-THUMB8BASE-NEXT: str r3, [sp, #84]
-; CHECK-THUMB8BASE-NEXT: str r2, [sp, #76]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #72]
-; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #76]
-; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #72]
+; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #24] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r4, [sp, #64]
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #68]
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #4]
; CHECK-THUMB8BASE-NEXT: str r0, [sp]
; CHECK-THUMB8BASE-NEXT: movw r0, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r0, :upper16:atomic_i64
-; CHECK-THUMB8BASE-NEXT: add r1, sp, #80
+; CHECK-THUMB8BASE-NEXT: add r1, sp, #64
; CHECK-THUMB8BASE-NEXT: bl __atomic_compare_exchange_8
; CHECK-THUMB8BASE-NEXT: mov r2, r0
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #84]
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #68]
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #80]
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #64]
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #68]
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #64]
; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: lsls r2, r2, #24
; CHECK-THUMB8BASE-NEXT: cmp r2, #0
; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: bne .LBB43_1
+; CHECK-THUMB8BASE-NEXT: beq .LBB43_1
; CHECK-THUMB8BASE-NEXT: b .LBB43_8
; CHECK-THUMB8BASE-NEXT: .LBB43_8: @ %atomicrmw.end
; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: add sp, #88
+; CHECK-THUMB8BASE-NEXT: add sp, #72
; CHECK-THUMB8BASE-NEXT: pop {r4, pc}
entry:
%0 = atomicrmw umin ptr @atomic_i64, i64 1 monotonic
diff --git a/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll b/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll
index f4d006723a7dbe..db71eae97544db 100644
--- a/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll
+++ b/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll
@@ -67,25 +67,20 @@ define i8 @rmw8(ptr %p) {
define i8 @cmpxchg8(ptr %p) {
; NO-ATOMIC32-LABEL: cmpxchg8:
; NO-ATOMIC32: @ %bb.0:
-; NO-ATOMIC32-NEXT: .save {r4, lr}
-; NO-ATOMIC32-NEXT: push {r4, lr}
-; NO-ATOMIC32-NEXT: .pad #16
-; NO-ATOMIC32-NEXT: sub sp, #16
-; NO-ATOMIC32-NEXT: add r1, sp, #8
-; NO-ATOMIC32-NEXT: movs r2, #1
+; NO-ATOMIC32-NEXT: .save {r7, lr}
+; NO-ATOMIC32-NEXT: push {r7, lr}
+; NO-ATOMIC32-NEXT: .pad #8
+; NO-ATOMIC32-NEXT: sub sp, #8
+; NO-ATOMIC32-NEXT: add r1, sp, #4
+; NO-ATOMIC32-NEXT: movs r2, #0
; NO-ATOMIC32-NEXT: strb r2, [r1]
-; NO-ATOMIC32-NEXT: add r4, sp, #12
-; NO-ATOMIC32-NEXT: movs r1, #0
-; NO-ATOMIC32-NEXT: strb r1, [r4]
; NO-ATOMIC32-NEXT: movs r3, #5
; NO-ATOMIC32-NEXT: str r3, [sp]
-; NO-ATOMIC32-NEXT: mov r1, r4
+; NO-ATOMIC32-NEXT: movs r2, #1
; NO-ATOMIC32-NEXT: bl __atomic_compare_exchange_1
-; NO-ATOMIC32-NEXT: ldrb r0, [r4]
-; NO-ATOMIC32-NEXT: add r1, sp, #4
-; NO-ATOMIC32-NEXT: strb r0, [r1]
-; NO-ATOMIC32-NEXT: add sp, #16
-; NO-ATOMIC32-NEXT: pop {r4, pc}
+; NO-ATOMIC32-NEXT: ldr r0, [sp, #4]
+; NO-ATOMIC32-NEXT: add sp, #8
+; NO-ATOMIC32-NEXT: pop {r7, pc}
;
; ATOMIC32-LABEL: cmpxchg8:
; ATOMIC32: @ %bb.0:
@@ -167,25 +162,20 @@ define i16 @rmw16(ptr %p) {
define i16 @cmpxchg16(ptr %p) {
; NO-ATOMIC32-LABEL: cmpxchg16:
; NO-ATOMIC32: @ %bb.0:
-; NO-ATOMIC32-NEXT: .save {r4, lr}
-; NO-ATOMIC32-NEXT: push {r4, lr}
-; NO-ATOMIC32-NEXT: .pad #16
-; NO-ATOMIC32-NEXT: sub sp, #16
-; NO-ATOMIC32-NEXT: add r1, sp, #8
-; NO-ATOMIC32-NEXT: movs r2, #1
+; NO-ATOMIC32-NEXT: .save {r7, lr}
+; NO-ATOMIC32-NEXT: push {r7, lr}
+; NO-ATOMIC32-NEXT: .pad #8
+; NO-ATOMIC32-NEXT: sub sp, #8
+; NO-ATOMIC32-NEXT: add r1, sp, #4
+; NO-ATOMIC32-NEXT: movs r2, #0
; NO-ATOMIC32-NEXT: strh r2, [r1]
-; NO-ATOMIC32-NEXT: add r4, sp, #12
-; NO-ATOMIC32-NEXT: movs r1, #0
-; NO-ATOMIC32-NEXT: strh r1, [r4]
; NO-ATOMIC32-NEXT: movs r3, #5
; NO-ATOMIC32-NEXT: str r3, [sp]
-; NO-ATOMIC32-NEXT: mov r1, r4
+; NO-ATOMIC32-NEXT: movs r2, #1
; NO-ATOMIC32-NEXT: bl __atomic_compare_exchange_2
-; NO-ATOMIC32-NEXT: ldrh r0, [r4]
-; NO-ATOMIC32-NEXT: add r1, sp, #4
-; NO-ATOMIC32-NEXT: strh r0, [r1]
-; NO-ATOMIC32-NEXT: add sp, #16
-; NO-ATOMIC32-NEXT: pop {r4, pc}
+; NO-ATOMIC32-NEXT: ldr r0, [sp, #4]
+; NO-ATOMIC32-NEXT: add sp, #8
+; NO-ATOMIC32-NEXT: pop {r7, pc}
;
; ATOMIC32-LABEL: cmpxchg16:
; ATOMIC32: @ %bb.0:
@@ -269,19 +259,17 @@ define i32 @cmpxchg32(ptr %p) {
; NO-ATOMIC32: @ %bb.0:
; NO-ATOMIC32-NEXT: .save {r7, lr}
; NO-ATOMIC32-NEXT: push {r7, lr}
-; NO-ATOMIC32-NEXT: .pad #16
-; NO-ATOMIC32-NEXT: sub sp, #16
-; NO-ATOMIC32-NEXT: movs r2, #1
-; NO-ATOMIC32-NEXT: str r2, [sp, #8]
+; NO-ATOMIC32-NEXT: .pad #8
+; NO-ATOMIC32-NEXT: sub sp, #8
; NO-ATOMIC32-NEXT: movs r1, #0
-; NO-ATOMIC32-NEXT: str r1, [sp, #12]
+; NO-ATOMIC32-NEXT: str r1, [sp, #4]
; NO-ATOMIC32-NEXT: movs r3, #5
; NO-ATOMIC32-NEXT: str r3, [sp]
-; NO-ATOMIC32-NEXT: add r1, sp, #12
+; NO-ATOMIC32-NEXT: add r1, sp, #4
+; NO-ATOMIC32-NEXT: movs r2, #1
; NO-ATOMIC32-NEXT: bl __atomic_compare_exchange_4
-; NO-ATOMIC32-NEXT: ldr r0, [sp, #12]
-; NO-ATOMIC32-NEXT: str r0, [sp, #4]
-; NO-ATOMIC32-NEXT: add sp, #16
+; NO-ATOMIC32-NEXT: ldr r0, [sp, #4]
+; NO-ATOMIC32-NEXT: add sp, #8
; NO-ATOMIC32-NEXT: pop {r7, pc}
;
; ATOMIC32-LABEL: cmpxchg32:
@@ -352,24 +340,20 @@ define i64 @cmpxchg64(ptr %p) {
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .pad #32
-; CHECK-NEXT: sub sp, #32
+; CHECK-NEXT: .pad #16
+; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: movs r3, #0
-; CHECK-NEXT: str r3, [sp, #20]
-; CHECK-NEXT: movs r2, #1
-; CHECK-NEXT: str r2, [sp, #16]
-; CHECK-NEXT: str r3, [sp, #28]
-; CHECK-NEXT: str r3, [sp, #24]
+; CHECK-NEXT: str r3, [sp, #12]
+; CHECK-NEXT: str r3, [sp, #8]
; CHECK-NEXT: movs r1, #5
; CHECK-NEXT: str r1, [sp]
; CHECK-NEXT: str r1, [sp, #4]
-; CHECK-NEXT: add r1, sp, #24
+; CHECK-NEXT: add r1, sp, #8
+; CHECK-NEXT: movs r2, #1
; CHECK-NEXT: bl __atomic_compare_exchange_8
-; CHECK-NEXT: ldr r1, [sp, #28]
-; CHECK-NEXT: str r1, [sp, #12]
-; CHECK-NEXT: ldr r0, [sp, #24]
-; CHECK-NEXT: str r0, [sp, #8]
-; CHECK-NEXT: add sp, #32
+; CHECK-NEXT: ldr r1, [sp, #12]
+; CHECK-NEXT: ldr r0, [sp, #8]
+; CHECK-NEXT: add sp, #16
; CHECK-NEXT: pop {r7, pc}
%res = cmpxchg ptr %p, i64 0, i64 1 seq_cst seq_cst
%res.0 = extractvalue { i64, i1 } %res, 0
diff --git a/llvm/test/CodeGen/PowerPC/atomics-i128.ll b/llvm/test/CodeGen/PowerPC/atomics-i128.ll
index 3f28334232e636..0d231769ac505c 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-i128.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-i128.ll
@@ -184,11 +184,10 @@ define i128 @add(ptr %a, i128 %x) {
; PPC-PWR8-LABEL: add:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -112(r1)
-; PPC-PWR8-NEXT: stw r0, 116(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 112
+; PPC-PWR8-NEXT: stwu r1, -80(r1)
+; PPC-PWR8-NEXT: stw r0, 84(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
; PPC-PWR8-NEXT: .cfi_offset lr, 4
-; PPC-PWR8-NEXT: .cfi_offset r23, -36
; PPC-PWR8-NEXT: .cfi_offset r24, -32
; PPC-PWR8-NEXT: .cfi_offset r25, -28
; PPC-PWR8-NEXT: .cfi_offset r26, -24
@@ -196,67 +195,63 @@ define i128 @add(ptr %a, i128 %x) {
; PPC-PWR8-NEXT: .cfi_offset r28, -16
; PPC-PWR8-NEXT: .cfi_offset r29, -12
; PPC-PWR8-NEXT: .cfi_offset r30, -8
-; PPC-PWR8-NEXT: stw r26, 88(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: stw r27, 92(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r27, r5
; PPC-PWR8-NEXT: mr r26, r3
-; PPC-PWR8-NEXT: stw r28, 96(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r28, r6
; PPC-PWR8-NEXT: lwz r6, 12(r3)
-; PPC-PWR8-NEXT: stw r23, 76(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: lwz r5, 8(r3)
; PPC-PWR8-NEXT: lwz r4, 4(r3)
-; PPC-PWR8-NEXT: stw r24, 80(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r24, r1, 40
+; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r25, r1, 32
; PPC-PWR8-NEXT: lwz r3, 0(r3)
-; PPC-PWR8-NEXT: stw r25, 84(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r25, r1, 56
-; PPC-PWR8-NEXT: addi r23, r1, 16
-; PPC-PWR8-NEXT: stw r29, 100(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: stw r30, 104(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: mr r30, r8
+; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r29, r7
+; PPC-PWR8-NEXT: addi r24, r1, 16
+; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r30, r8
; PPC-PWR8-NEXT: .p2align 4
; PPC-PWR8-NEXT: .LBB1_1: # %atomicrmw.start
; PPC-PWR8-NEXT: #
; PPC-PWR8-NEXT: addc r7, r6, r30
-; PPC-PWR8-NEXT: stw r4, 60(r1)
-; PPC-PWR8-NEXT: stw r3, 56(r1)
+; PPC-PWR8-NEXT: stw r4, 36(r1)
+; PPC-PWR8-NEXT: stw r3, 32(r1)
; PPC-PWR8-NEXT: adde r8, r5, r29
-; PPC-PWR8-NEXT: stw r5, 64(r1)
-; PPC-PWR8-NEXT: stw r6, 68(r1)
+; PPC-PWR8-NEXT: stw r5, 40(r1)
+; PPC-PWR8-NEXT: stw r6, 44(r1)
; PPC-PWR8-NEXT: mr r5, r25
; PPC-PWR8-NEXT: mr r6, r24
; PPC-PWR8-NEXT: adde r4, r4, r28
-; PPC-PWR8-NEXT: stw r7, 52(r1)
+; PPC-PWR8-NEXT: stw r7, 28(r1)
; PPC-PWR8-NEXT: li r7, 5
; PPC-PWR8-NEXT: adde r3, r3, r27
-; PPC-PWR8-NEXT: stw r8, 48(r1)
+; PPC-PWR8-NEXT: stw r8, 24(r1)
; PPC-PWR8-NEXT: li r8, 5
-; PPC-PWR8-NEXT: stw r4, 44(r1)
+; PPC-PWR8-NEXT: stw r4, 20(r1)
; PPC-PWR8-NEXT: mr r4, r26
-; PPC-PWR8-NEXT: stw r3, 40(r1)
+; PPC-PWR8-NEXT: stw r3, 16(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: lxvw4x vs0, 0, r25
-; PPC-PWR8-NEXT: andi. r3, r3, 255
-; PPC-PWR8-NEXT: stxvw4x vs0, 0, r23
-; PPC-PWR8-NEXT: lwz r6, 28(r1)
-; PPC-PWR8-NEXT: lwz r5, 24(r1)
-; PPC-PWR8-NEXT: lwz r4, 20(r1)
-; PPC-PWR8-NEXT: lwz r3, 16(r1)
-; PPC-PWR8-NEXT: bne cr0, .LBB1_1
+; PPC-PWR8-NEXT: mr r7, r3
+; PPC-PWR8-NEXT: lwz r6, 44(r1)
+; PPC-PWR8-NEXT: lwz r5, 40(r1)
+; PPC-PWR8-NEXT: lwz r4, 36(r1)
+; PPC-PWR8-NEXT: lwz r3, 32(r1)
+; PPC-PWR8-NEXT: cmplwi r7, 0
+; PPC-PWR8-NEXT: beq cr0, .LBB1_1
; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
-; PPC-PWR8-NEXT: lwz r30, 104(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r29, 100(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r28, 96(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r27, 92(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r26, 88(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r25, 84(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r24, 80(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r23, 76(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r0, 116(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 112
+; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r0, 84(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 80
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
@@ -331,11 +326,10 @@ define i128 @sub(ptr %a, i128 %x) {
; PPC-PWR8-LABEL: sub:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -112(r1)
-; PPC-PWR8-NEXT: stw r0, 116(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 112
+; PPC-PWR8-NEXT: stwu r1, -80(r1)
+; PPC-PWR8-NEXT: stw r0, 84(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
; PPC-PWR8-NEXT: .cfi_offset lr, 4
-; PPC-PWR8-NEXT: .cfi_offset r23, -36
; PPC-PWR8-NEXT: .cfi_offset r24, -32
; PPC-PWR8-NEXT: .cfi_offset r25, -28
; PPC-PWR8-NEXT: .cfi_offset r26, -24
@@ -343,67 +337,63 @@ define i128 @sub(ptr %a, i128 %x) {
; PPC-PWR8-NEXT: .cfi_offset r28, -16
; PPC-PWR8-NEXT: .cfi_offset r29, -12
; PPC-PWR8-NEXT: .cfi_offset r30, -8
-; PPC-PWR8-NEXT: stw r26, 88(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: stw r27, 92(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r27, r5
; PPC-PWR8-NEXT: mr r26, r3
-; PPC-PWR8-NEXT: stw r28, 96(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r28, r6
; PPC-PWR8-NEXT: lwz r6, 12(r3)
-; PPC-PWR8-NEXT: stw r23, 76(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: lwz r5, 8(r3)
; PPC-PWR8-NEXT: lwz r4, 4(r3)
-; PPC-PWR8-NEXT: stw r24, 80(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r24, r1, 40
+; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r25, r1, 32
; PPC-PWR8-NEXT: lwz r3, 0(r3)
-; PPC-PWR8-NEXT: stw r25, 84(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r25, r1, 56
-; PPC-PWR8-NEXT: addi r23, r1, 16
-; PPC-PWR8-NEXT: stw r29, 100(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: stw r30, 104(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: mr r30, r8
+; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r29, r7
+; PPC-PWR8-NEXT: addi r24, r1, 16
+; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r30, r8
; PPC-PWR8-NEXT: .p2align 4
; PPC-PWR8-NEXT: .LBB2_1: # %atomicrmw.start
; PPC-PWR8-NEXT: #
; PPC-PWR8-NEXT: subc r7, r6, r30
-; PPC-PWR8-NEXT: stw r4, 60(r1)
-; PPC-PWR8-NEXT: stw r3, 56(r1)
+; PPC-PWR8-NEXT: stw r4, 36(r1)
+; PPC-PWR8-NEXT: stw r3, 32(r1)
; PPC-PWR8-NEXT: subfe r8, r29, r5
-; PPC-PWR8-NEXT: stw r5, 64(r1)
-; PPC-PWR8-NEXT: stw r6, 68(r1)
+; PPC-PWR8-NEXT: stw r5, 40(r1)
+; PPC-PWR8-NEXT: stw r6, 44(r1)
; PPC-PWR8-NEXT: mr r5, r25
; PPC-PWR8-NEXT: mr r6, r24
; PPC-PWR8-NEXT: subfe r4, r28, r4
-; PPC-PWR8-NEXT: stw r7, 52(r1)
+; PPC-PWR8-NEXT: stw r7, 28(r1)
; PPC-PWR8-NEXT: li r7, 5
; PPC-PWR8-NEXT: subfe r3, r27, r3
-; PPC-PWR8-NEXT: stw r8, 48(r1)
+; PPC-PWR8-NEXT: stw r8, 24(r1)
; PPC-PWR8-NEXT: li r8, 5
-; PPC-PWR8-NEXT: stw r4, 44(r1)
+; PPC-PWR8-NEXT: stw r4, 20(r1)
; PPC-PWR8-NEXT: mr r4, r26
-; PPC-PWR8-NEXT: stw r3, 40(r1)
+; PPC-PWR8-NEXT: stw r3, 16(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: lxvw4x vs0, 0, r25
-; PPC-PWR8-NEXT: andi. r3, r3, 255
-; PPC-PWR8-NEXT: stxvw4x vs0, 0, r23
-; PPC-PWR8-NEXT: lwz r6, 28(r1)
-; PPC-PWR8-NEXT: lwz r5, 24(r1)
-; PPC-PWR8-NEXT: lwz r4, 20(r1)
-; PPC-PWR8-NEXT: lwz r3, 16(r1)
-; PPC-PWR8-NEXT: bne cr0, .LBB2_1
+; PPC-PWR8-NEXT: mr r7, r3
+; PPC-PWR8-NEXT: lwz r6, 44(r1)
+; PPC-PWR8-NEXT: lwz r5, 40(r1)
+; PPC-PWR8-NEXT: lwz r4, 36(r1)
+; PPC-PWR8-NEXT: lwz r3, 32(r1)
+; PPC-PWR8-NEXT: cmplwi r7, 0
+; PPC-PWR8-NEXT: beq cr0, .LBB2_1
; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
-; PPC-PWR8-NEXT: lwz r30, 104(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r29, 100(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r28, 96(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r27, 92(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r26, 88(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r25, 84(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r24, 80(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r23, 76(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r0, 116(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 112
+; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r0, 84(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 80
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
@@ -478,11 +468,10 @@ define i128 @and(ptr %a, i128 %x) {
; PPC-PWR8-LABEL: and:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -112(r1)
-; PPC-PWR8-NEXT: stw r0, 116(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 112
+; PPC-PWR8-NEXT: stwu r1, -80(r1)
+; PPC-PWR8-NEXT: stw r0, 84(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
; PPC-PWR8-NEXT: .cfi_offset lr, 4
-; PPC-PWR8-NEXT: .cfi_offset r23, -36
; PPC-PWR8-NEXT: .cfi_offset r24, -32
; PPC-PWR8-NEXT: .cfi_offset r25, -28
; PPC-PWR8-NEXT: .cfi_offset r26, -24
@@ -490,67 +479,63 @@ define i128 @and(ptr %a, i128 %x) {
; PPC-PWR8-NEXT: .cfi_offset r28, -16
; PPC-PWR8-NEXT: .cfi_offset r29, -12
; PPC-PWR8-NEXT: .cfi_offset r30, -8
-; PPC-PWR8-NEXT: stw r26, 88(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: stw r27, 92(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r27, r5
; PPC-PWR8-NEXT: mr r26, r3
-; PPC-PWR8-NEXT: stw r28, 96(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r28, r6
; PPC-PWR8-NEXT: lwz r6, 12(r3)
-; PPC-PWR8-NEXT: stw r23, 76(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: lwz r5, 8(r3)
; PPC-PWR8-NEXT: lwz r4, 4(r3)
-; PPC-PWR8-NEXT: stw r24, 80(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r24, r1, 40
+; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r25, r1, 32
; PPC-PWR8-NEXT: lwz r3, 0(r3)
-; PPC-PWR8-NEXT: stw r25, 84(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r25, r1, 56
-; PPC-PWR8-NEXT: addi r23, r1, 16
-; PPC-PWR8-NEXT: stw r29, 100(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: stw r30, 104(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: mr r30, r8
+; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r29, r7
+; PPC-PWR8-NEXT: addi r24, r1, 16
+; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r30, r8
; PPC-PWR8-NEXT: .p2align 4
; PPC-PWR8-NEXT: .LBB3_1: # %atomicrmw.start
; PPC-PWR8-NEXT: #
-; PPC-PWR8-NEXT: stw r3, 56(r1)
-; PPC-PWR8-NEXT: and r7, r6, r30
-; PPC-PWR8-NEXT: stw r4, 60(r1)
-; PPC-PWR8-NEXT: and r8, r5, r29
-; PPC-PWR8-NEXT: and r4, r4, r28
+; PPC-PWR8-NEXT: stw r3, 32(r1)
; PPC-PWR8-NEXT: and r3, r3, r27
-; PPC-PWR8-NEXT: stw r5, 64(r1)
-; PPC-PWR8-NEXT: stw r6, 68(r1)
-; PPC-PWR8-NEXT: stw r3, 40(r1)
-; PPC-PWR8-NEXT: stw r4, 44(r1)
-; PPC-PWR8-NEXT: li r3, 16
-; PPC-PWR8-NEXT: mr r4, r26
-; PPC-PWR8-NEXT: stw r8, 48(r1)
-; PPC-PWR8-NEXT: stw r7, 52(r1)
+; PPC-PWR8-NEXT: stw r4, 36(r1)
+; PPC-PWR8-NEXT: and r4, r4, r28
+; PPC-PWR8-NEXT: and r7, r5, r29
+; PPC-PWR8-NEXT: stw r5, 40(r1)
+; PPC-PWR8-NEXT: and r5, r6, r30
+; PPC-PWR8-NEXT: stw r6, 44(r1)
+; PPC-PWR8-NEXT: stw r5, 28(r1)
+; PPC-PWR8-NEXT: stw r7, 24(r1)
; PPC-PWR8-NEXT: mr r5, r25
; PPC-PWR8-NEXT: li r7, 5
+; PPC-PWR8-NEXT: stw r4, 20(r1)
+; PPC-PWR8-NEXT: stw r3, 16(r1)
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: mr r4, r26
; PPC-PWR8-NEXT: mr r6, r24
; PPC-PWR8-NEXT: li r8, 5
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: lxvw4x vs0, 0, r25
-; PPC-PWR8-NEXT: andi. r3, r3, 255
-; PPC-PWR8-NEXT: stxvw4x vs0, 0, r23
-; PPC-PWR8-NEXT: lwz r6, 28(r1)
-; PPC-PWR8-NEXT: lwz r5, 24(r1)
-; PPC-PWR8-NEXT: lwz r4, 20(r1)
-; PPC-PWR8-NEXT: lwz r3, 16(r1)
-; PPC-PWR8-NEXT: bne cr0, .LBB3_1
+; PPC-PWR8-NEXT: mr r7, r3
+; PPC-PWR8-NEXT: lwz r6, 44(r1)
+; PPC-PWR8-NEXT: lwz r5, 40(r1)
+; PPC-PWR8-NEXT: lwz r4, 36(r1)
+; PPC-PWR8-NEXT: lwz r3, 32(r1)
+; PPC-PWR8-NEXT: cmplwi r7, 0
+; PPC-PWR8-NEXT: beq cr0, .LBB3_1
; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
-; PPC-PWR8-NEXT: lwz r30, 104(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r29, 100(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r28, 96(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r27, 92(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r26, 88(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r25, 84(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r24, 80(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r23, 76(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r0, 116(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 112
+; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r0, 84(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 80
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
@@ -625,11 +610,10 @@ define i128 @or(ptr %a, i128 %x) {
; PPC-PWR8-LABEL: or:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -112(r1)
-; PPC-PWR8-NEXT: stw r0, 116(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 112
+; PPC-PWR8-NEXT: stwu r1, -80(r1)
+; PPC-PWR8-NEXT: stw r0, 84(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
; PPC-PWR8-NEXT: .cfi_offset lr, 4
-; PPC-PWR8-NEXT: .cfi_offset r23, -36
; PPC-PWR8-NEXT: .cfi_offset r24, -32
; PPC-PWR8-NEXT: .cfi_offset r25, -28
; PPC-PWR8-NEXT: .cfi_offset r26, -24
@@ -637,67 +621,63 @@ define i128 @or(ptr %a, i128 %x) {
; PPC-PWR8-NEXT: .cfi_offset r28, -16
; PPC-PWR8-NEXT: .cfi_offset r29, -12
; PPC-PWR8-NEXT: .cfi_offset r30, -8
-; PPC-PWR8-NEXT: stw r26, 88(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: stw r27, 92(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r27, r5
; PPC-PWR8-NEXT: mr r26, r3
-; PPC-PWR8-NEXT: stw r28, 96(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r28, r6
; PPC-PWR8-NEXT: lwz r6, 12(r3)
-; PPC-PWR8-NEXT: stw r23, 76(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: lwz r5, 8(r3)
; PPC-PWR8-NEXT: lwz r4, 4(r3)
-; PPC-PWR8-NEXT: stw r24, 80(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r24, r1, 40
+; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r25, r1, 32
; PPC-PWR8-NEXT: lwz r3, 0(r3)
-; PPC-PWR8-NEXT: stw r25, 84(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r25, r1, 56
-; PPC-PWR8-NEXT: addi r23, r1, 16
-; PPC-PWR8-NEXT: stw r29, 100(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: stw r30, 104(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: mr r30, r8
+; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r29, r7
+; PPC-PWR8-NEXT: addi r24, r1, 16
+; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r30, r8
; PPC-PWR8-NEXT: .p2align 4
; PPC-PWR8-NEXT: .LBB4_1: # %atomicrmw.start
; PPC-PWR8-NEXT: #
-; PPC-PWR8-NEXT: stw r3, 56(r1)
-; PPC-PWR8-NEXT: or r7, r6, r30
-; PPC-PWR8-NEXT: stw r4, 60(r1)
-; PPC-PWR8-NEXT: or r8, r5, r29
-; PPC-PWR8-NEXT: or r4, r4, r28
+; PPC-PWR8-NEXT: stw r3, 32(r1)
; PPC-PWR8-NEXT: or r3, r3, r27
-; PPC-PWR8-NEXT: stw r5, 64(r1)
-; PPC-PWR8-NEXT: stw r6, 68(r1)
-; PPC-PWR8-NEXT: stw r3, 40(r1)
-; PPC-PWR8-NEXT: stw r4, 44(r1)
-; PPC-PWR8-NEXT: li r3, 16
-; PPC-PWR8-NEXT: mr r4, r26
-; PPC-PWR8-NEXT: stw r8, 48(r1)
-; PPC-PWR8-NEXT: stw r7, 52(r1)
+; PPC-PWR8-NEXT: stw r4, 36(r1)
+; PPC-PWR8-NEXT: or r4, r4, r28
+; PPC-PWR8-NEXT: or r7, r5, r29
+; PPC-PWR8-NEXT: stw r5, 40(r1)
+; PPC-PWR8-NEXT: or r5, r6, r30
+; PPC-PWR8-NEXT: stw r6, 44(r1)
+; PPC-PWR8-NEXT: stw r5, 28(r1)
+; PPC-PWR8-NEXT: stw r7, 24(r1)
; PPC-PWR8-NEXT: mr r5, r25
; PPC-PWR8-NEXT: li r7, 5
+; PPC-PWR8-NEXT: stw r4, 20(r1)
+; PPC-PWR8-NEXT: stw r3, 16(r1)
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: mr r4, r26
; PPC-PWR8-NEXT: mr r6, r24
; PPC-PWR8-NEXT: li r8, 5
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: lxvw4x vs0, 0, r25
-; PPC-PWR8-NEXT: andi. r3, r3, 255
-; PPC-PWR8-NEXT: stxvw4x vs0, 0, r23
-; PPC-PWR8-NEXT: lwz r6, 28(r1)
-; PPC-PWR8-NEXT: lwz r5, 24(r1)
-; PPC-PWR8-NEXT: lwz r4, 20(r1)
-; PPC-PWR8-NEXT: lwz r3, 16(r1)
-; PPC-PWR8-NEXT: bne cr0, .LBB4_1
+; PPC-PWR8-NEXT: mr r7, r3
+; PPC-PWR8-NEXT: lwz r6, 44(r1)
+; PPC-PWR8-NEXT: lwz r5, 40(r1)
+; PPC-PWR8-NEXT: lwz r4, 36(r1)
+; PPC-PWR8-NEXT: lwz r3, 32(r1)
+; PPC-PWR8-NEXT: cmplwi r7, 0
+; PPC-PWR8-NEXT: beq cr0, .LBB4_1
; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
-; PPC-PWR8-NEXT: lwz r30, 104(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r29, 100(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r28, 96(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r27, 92(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r26, 88(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r25, 84(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r24, 80(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r23, 76(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r0, 116(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 112
+; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r0, 84(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 80
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
@@ -772,11 +752,10 @@ define i128 @xor(ptr %a, i128 %x) {
; PPC-PWR8-LABEL: xor:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -112(r1)
-; PPC-PWR8-NEXT: stw r0, 116(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 112
+; PPC-PWR8-NEXT: stwu r1, -80(r1)
+; PPC-PWR8-NEXT: stw r0, 84(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
; PPC-PWR8-NEXT: .cfi_offset lr, 4
-; PPC-PWR8-NEXT: .cfi_offset r23, -36
; PPC-PWR8-NEXT: .cfi_offset r24, -32
; PPC-PWR8-NEXT: .cfi_offset r25, -28
; PPC-PWR8-NEXT: .cfi_offset r26, -24
@@ -784,67 +763,63 @@ define i128 @xor(ptr %a, i128 %x) {
; PPC-PWR8-NEXT: .cfi_offset r28, -16
; PPC-PWR8-NEXT: .cfi_offset r29, -12
; PPC-PWR8-NEXT: .cfi_offset r30, -8
-; PPC-PWR8-NEXT: stw r26, 88(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: stw r27, 92(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r27, r5
; PPC-PWR8-NEXT: mr r26, r3
-; PPC-PWR8-NEXT: stw r28, 96(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r28, r6
; PPC-PWR8-NEXT: lwz r6, 12(r3)
-; PPC-PWR8-NEXT: stw r23, 76(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: lwz r5, 8(r3)
; PPC-PWR8-NEXT: lwz r4, 4(r3)
-; PPC-PWR8-NEXT: stw r24, 80(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r24, r1, 40
+; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r25, r1, 32
; PPC-PWR8-NEXT: lwz r3, 0(r3)
-; PPC-PWR8-NEXT: stw r25, 84(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r25, r1, 56
-; PPC-PWR8-NEXT: addi r23, r1, 16
-; PPC-PWR8-NEXT: stw r29, 100(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: stw r30, 104(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: mr r30, r8
+; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r29, r7
+; PPC-PWR8-NEXT: addi r24, r1, 16
+; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r30, r8
; PPC-PWR8-NEXT: .p2align 4
; PPC-PWR8-NEXT: .LBB5_1: # %atomicrmw.start
; PPC-PWR8-NEXT: #
-; PPC-PWR8-NEXT: stw r3, 56(r1)
-; PPC-PWR8-NEXT: xor r7, r6, r30
-; PPC-PWR8-NEXT: stw r4, 60(r1)
-; PPC-PWR8-NEXT: xor r8, r5, r29
-; PPC-PWR8-NEXT: xor r4, r4, r28
+; PPC-PWR8-NEXT: stw r3, 32(r1)
; PPC-PWR8-NEXT: xor r3, r3, r27
-; PPC-PWR8-NEXT: stw r5, 64(r1)
-; PPC-PWR8-NEXT: stw r6, 68(r1)
-; PPC-PWR8-NEXT: stw r3, 40(r1)
-; PPC-PWR8-NEXT: stw r4, 44(r1)
-; PPC-PWR8-NEXT: li r3, 16
-; PPC-PWR8-NEXT: mr r4, r26
-; PPC-PWR8-NEXT: stw r8, 48(r1)
-; PPC-PWR8-NEXT: stw r7, 52(r1)
+; PPC-PWR8-NEXT: stw r4, 36(r1)
+; PPC-PWR8-NEXT: xor r4, r4, r28
+; PPC-PWR8-NEXT: xor r7, r5, r29
+; PPC-PWR8-NEXT: stw r5, 40(r1)
+; PPC-PWR8-NEXT: xor r5, r6, r30
+; PPC-PWR8-NEXT: stw r6, 44(r1)
+; PPC-PWR8-NEXT: stw r5, 28(r1)
+; PPC-PWR8-NEXT: stw r7, 24(r1)
; PPC-PWR8-NEXT: mr r5, r25
; PPC-PWR8-NEXT: li r7, 5
+; PPC-PWR8-NEXT: stw r4, 20(r1)
+; PPC-PWR8-NEXT: stw r3, 16(r1)
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: mr r4, r26
; PPC-PWR8-NEXT: mr r6, r24
; PPC-PWR8-NEXT: li r8, 5
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: lxvw4x vs0, 0, r25
-; PPC-PWR8-NEXT: andi. r3, r3, 255
-; PPC-PWR8-NEXT: stxvw4x vs0, 0, r23
-; PPC-PWR8-NEXT: lwz r6, 28(r1)
-; PPC-PWR8-NEXT: lwz r5, 24(r1)
-; PPC-PWR8-NEXT: lwz r4, 20(r1)
-; PPC-PWR8-NEXT: lwz r3, 16(r1)
-; PPC-PWR8-NEXT: bne cr0, .LBB5_1
+; PPC-PWR8-NEXT: mr r7, r3
+; PPC-PWR8-NEXT: lwz r6, 44(r1)
+; PPC-PWR8-NEXT: lwz r5, 40(r1)
+; PPC-PWR8-NEXT: lwz r4, 36(r1)
+; PPC-PWR8-NEXT: lwz r3, 32(r1)
+; PPC-PWR8-NEXT: cmplwi r7, 0
+; PPC-PWR8-NEXT: beq cr0, .LBB5_1
; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
-; PPC-PWR8-NEXT: lwz r30, 104(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r29, 100(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r28, 96(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r27, 92(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r26, 88(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r25, 84(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r24, 80(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r23, 76(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r0, 116(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 112
+; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r0, 84(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 80
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
@@ -919,11 +894,10 @@ define i128 @nand(ptr %a, i128 %x) {
; PPC-PWR8-LABEL: nand:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -112(r1)
-; PPC-PWR8-NEXT: stw r0, 116(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 112
+; PPC-PWR8-NEXT: stwu r1, -80(r1)
+; PPC-PWR8-NEXT: stw r0, 84(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
; PPC-PWR8-NEXT: .cfi_offset lr, 4
-; PPC-PWR8-NEXT: .cfi_offset r23, -36
; PPC-PWR8-NEXT: .cfi_offset r24, -32
; PPC-PWR8-NEXT: .cfi_offset r25, -28
; PPC-PWR8-NEXT: .cfi_offset r26, -24
@@ -931,67 +905,63 @@ define i128 @nand(ptr %a, i128 %x) {
; PPC-PWR8-NEXT: .cfi_offset r28, -16
; PPC-PWR8-NEXT: .cfi_offset r29, -12
; PPC-PWR8-NEXT: .cfi_offset r30, -8
-; PPC-PWR8-NEXT: stw r26, 88(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: stw r27, 92(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r27, r5
; PPC-PWR8-NEXT: mr r26, r3
-; PPC-PWR8-NEXT: stw r28, 96(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r28, r6
; PPC-PWR8-NEXT: lwz r6, 12(r3)
-; PPC-PWR8-NEXT: stw r23, 76(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: lwz r5, 8(r3)
; PPC-PWR8-NEXT: lwz r4, 4(r3)
-; PPC-PWR8-NEXT: stw r24, 80(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r24, r1, 40
+; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r25, r1, 32
; PPC-PWR8-NEXT: lwz r3, 0(r3)
-; PPC-PWR8-NEXT: stw r25, 84(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r25, r1, 56
-; PPC-PWR8-NEXT: addi r23, r1, 16
-; PPC-PWR8-NEXT: stw r29, 100(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: stw r30, 104(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: mr r30, r8
+; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r29, r7
+; PPC-PWR8-NEXT: addi r24, r1, 16
+; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r30, r8
; PPC-PWR8-NEXT: .p2align 4
; PPC-PWR8-NEXT: .LBB6_1: # %atomicrmw.start
; PPC-PWR8-NEXT: #
-; PPC-PWR8-NEXT: stw r3, 56(r1)
-; PPC-PWR8-NEXT: nand r7, r6, r30
-; PPC-PWR8-NEXT: stw r4, 60(r1)
-; PPC-PWR8-NEXT: nand r8, r5, r29
-; PPC-PWR8-NEXT: nand r4, r4, r28
+; PPC-PWR8-NEXT: stw r3, 32(r1)
; PPC-PWR8-NEXT: nand r3, r3, r27
-; PPC-PWR8-NEXT: stw r5, 64(r1)
-; PPC-PWR8-NEXT: stw r6, 68(r1)
-; PPC-PWR8-NEXT: stw r3, 40(r1)
-; PPC-PWR8-NEXT: stw r4, 44(r1)
-; PPC-PWR8-NEXT: li r3, 16
-; PPC-PWR8-NEXT: mr r4, r26
-; PPC-PWR8-NEXT: stw r8, 48(r1)
-; PPC-PWR8-NEXT: stw r7, 52(r1)
+; PPC-PWR8-NEXT: stw r4, 36(r1)
+; PPC-PWR8-NEXT: nand r4, r4, r28
+; PPC-PWR8-NEXT: nand r7, r5, r29
+; PPC-PWR8-NEXT: stw r5, 40(r1)
+; PPC-PWR8-NEXT: nand r5, r6, r30
+; PPC-PWR8-NEXT: stw r6, 44(r1)
+; PPC-PWR8-NEXT: stw r5, 28(r1)
+; PPC-PWR8-NEXT: stw r7, 24(r1)
; PPC-PWR8-NEXT: mr r5, r25
; PPC-PWR8-NEXT: li r7, 5
+; PPC-PWR8-NEXT: stw r4, 20(r1)
+; PPC-PWR8-NEXT: stw r3, 16(r1)
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: mr r4, r26
; PPC-PWR8-NEXT: mr r6, r24
; PPC-PWR8-NEXT: li r8, 5
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: lxvw4x vs0, 0, r25
-; PPC-PWR8-NEXT: andi. r3, r3, 255
-; PPC-PWR8-NEXT: stxvw4x vs0, 0, r23
-; PPC-PWR8-NEXT: lwz r6, 28(r1)
-; PPC-PWR8-NEXT: lwz r5, 24(r1)
-; PPC-PWR8-NEXT: lwz r4, 20(r1)
-; PPC-PWR8-NEXT: lwz r3, 16(r1)
-; PPC-PWR8-NEXT: bne cr0, .LBB6_1
+; PPC-PWR8-NEXT: mr r7, r3
+; PPC-PWR8-NEXT: lwz r6, 44(r1)
+; PPC-PWR8-NEXT: lwz r5, 40(r1)
+; PPC-PWR8-NEXT: lwz r4, 36(r1)
+; PPC-PWR8-NEXT: lwz r3, 32(r1)
+; PPC-PWR8-NEXT: cmplwi r7, 0
+; PPC-PWR8-NEXT: beq cr0, .LBB6_1
; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
-; PPC-PWR8-NEXT: lwz r30, 104(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r29, 100(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r28, 96(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r27, 92(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r26, 88(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r25, 84(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r24, 80(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r23, 76(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: lwz r0, 116(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 112
+; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r0, 84(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 80
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
@@ -1025,31 +995,22 @@ define i128 @cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
; PWR7-LABEL: cas_weak_acquire_acquire:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
-; PWR7-NEXT: stdu r1, -176(r1)
-; PWR7-NEXT: std r0, 192(r1)
-; PWR7-NEXT: .cfi_def_cfa_offset 176
+; PWR7-NEXT: stdu r1, -128(r1)
+; PWR7-NEXT: std r0, 144(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 128
; PWR7-NEXT: .cfi_offset lr, 16
-; PWR7-NEXT: .cfi_offset r30, -16
-; PWR7-NEXT: std r30, 160(r1) # 8-byte Folded Spill
-; PWR7-NEXT: addi r30, r1, 144
-; PWR7-NEXT: std r5, 152(r1)
-; PWR7-NEXT: std r7, 136(r1)
-; PWR7-NEXT: std r6, 128(r1)
+; PWR7-NEXT: std r5, 120(r1)
+; PWR7-NEXT: std r4, 112(r1)
+; PWR7-NEXT: addi r4, r1, 112
; PWR7-NEXT: mr r5, r6
; PWR7-NEXT: mr r6, r7
; PWR7-NEXT: li r7, 2
; PWR7-NEXT: li r8, 2
-; PWR7-NEXT: std r4, 144(r1)
-; PWR7-NEXT: mr r4, r30
; PWR7-NEXT: bl __atomic_compare_exchange_16
; PWR7-NEXT: nop
-; PWR7-NEXT: lxvd2x vs0, 0, r30
-; PWR7-NEXT: addi r3, r1, 112
-; PWR7-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
-; PWR7-NEXT: stxvd2x vs0, 0, r3
-; PWR7-NEXT: ld r3, 112(r1)
; PWR7-NEXT: ld r4, 120(r1)
-; PWR7-NEXT: addi r1, r1, 176
+; PWR7-NEXT: ld r3, 112(r1)
+; PWR7-NEXT: addi r1, r1, 128
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
@@ -1099,40 +1060,33 @@ define i128 @cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
; PPC-PWR8-LABEL: cas_weak_acquire_acquire:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -80(r1)
-; PPC-PWR8-NEXT: stw r0, 84(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
+; PPC-PWR8-NEXT: stwu r1, -48(r1)
+; PPC-PWR8-NEXT: stw r0, 52(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
; PPC-PWR8-NEXT: .cfi_offset lr, 4
-; PPC-PWR8-NEXT: .cfi_offset r30, -8
; PPC-PWR8-NEXT: mr r4, r3
-; PPC-PWR8-NEXT: lwz r3, 92(r1)
-; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r30, r1, 56
-; PPC-PWR8-NEXT: stw r8, 68(r1)
-; PPC-PWR8-NEXT: stw r7, 64(r1)
-; PPC-PWR8-NEXT: stw r6, 60(r1)
-; PPC-PWR8-NEXT: addi r6, r1, 40
-; PPC-PWR8-NEXT: stw r5, 56(r1)
-; PPC-PWR8-NEXT: mr r5, r30
+; PPC-PWR8-NEXT: lwz r3, 60(r1)
+; PPC-PWR8-NEXT: stw r8, 44(r1)
+; PPC-PWR8-NEXT: stw r7, 40(r1)
+; PPC-PWR8-NEXT: stw r6, 36(r1)
+; PPC-PWR8-NEXT: stw r5, 32(r1)
+; PPC-PWR8-NEXT: addi r5, r1, 32
+; PPC-PWR8-NEXT: addi r6, r1, 16
; PPC-PWR8-NEXT: li r7, 2
; PPC-PWR8-NEXT: li r8, 2
-; PPC-PWR8-NEXT: stw r3, 52(r1)
-; PPC-PWR8-NEXT: lwz r3, 88(r1)
-; PPC-PWR8-NEXT: stw r10, 44(r1)
-; PPC-PWR8-NEXT: stw r9, 40(r1)
-; PPC-PWR8-NEXT: stw r3, 48(r1)
+; PPC-PWR8-NEXT: stw r10, 20(r1)
+; PPC-PWR8-NEXT: stw r9, 16(r1)
+; PPC-PWR8-NEXT: stw r3, 28(r1)
+; PPC-PWR8-NEXT: lwz r3, 56(r1)
+; PPC-PWR8-NEXT: stw r3, 24(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: lxvw4x vs0, 0, r30
-; PPC-PWR8-NEXT: addi r3, r1, 16
-; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: stxvw4x vs0, 0, r3
-; PPC-PWR8-NEXT: lwz r3, 16(r1)
-; PPC-PWR8-NEXT: lwz r4, 20(r1)
-; PPC-PWR8-NEXT: lwz r5, 24(r1)
-; PPC-PWR8-NEXT: lwz r6, 28(r1)
-; PPC-PWR8-NEXT: lwz r0, 84(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 80
+; PPC-PWR8-NEXT: lwz r6, 44(r1)
+; PPC-PWR8-NEXT: lwz r5, 40(r1)
+; PPC-PWR8-NEXT: lwz r4, 36(r1)
+; PPC-PWR8-NEXT: lwz r3, 32(r1)
+; PPC-PWR8-NEXT: lwz r0, 52(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 48
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
@@ -1166,31 +1120,22 @@ define i128 @cas_weak_release_monotonic(ptr %a, i128 %cmp, i128 %new) {
; PWR7-LABEL: cas_weak_release_monotonic:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
-; PWR7-NEXT: stdu r1, -176(r1)
-; PWR7-NEXT: std r0, 192(r1)
-; PWR7-NEXT: .cfi_def_cfa_offset 176
+; PWR7-NEXT: stdu r1, -128(r1)
+; PWR7-NEXT: std r0, 144(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 128
; PWR7-NEXT: .cfi_offset lr, 16
-; PWR7-NEXT: .cfi_offset r30, -16
-; PWR7-NEXT: std r30, 160(r1) # 8-byte Folded Spill
-; PWR7-NEXT: addi r30, r1, 144
-; PWR7-NEXT: std r5, 152(r1)
-; PWR7-NEXT: std r7, 136(r1)
-; PWR7-NEXT: std r6, 128(r1)
+; PWR7-NEXT: std r5, 120(r1)
+; PWR7-NEXT: std r4, 112(r1)
+; PWR7-NEXT: addi r4, r1, 112
; PWR7-NEXT: mr r5, r6
; PWR7-NEXT: mr r6, r7
; PWR7-NEXT: li r7, 3
; PWR7-NEXT: li r8, 0
-; PWR7-NEXT: std r4, 144(r1)
-; PWR7-NEXT: mr r4, r30
; PWR7-NEXT: bl __atomic_compare_exchange_16
; PWR7-NEXT: nop
-; PWR7-NEXT: lxvd2x vs0, 0, r30
-; PWR7-NEXT: addi r3, r1, 112
-; PWR7-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
-; PWR7-NEXT: stxvd2x vs0, 0, r3
-; PWR7-NEXT: ld r3, 112(r1)
; PWR7-NEXT: ld r4, 120(r1)
-; PWR7-NEXT: addi r1, r1, 176
+; PWR7-NEXT: ld r3, 112(r1)
+; PWR7-NEXT: addi r1, r1, 128
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
@@ -1240,40 +1185,33 @@ define i128 @cas_weak_release_monotonic(ptr %a, i128 %cmp, i128 %new) {
; PPC-PWR8-LABEL: cas_weak_release_monotonic:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -80(r1)
-; PPC-PWR8-NEXT: stw r0, 84(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
+; PPC-PWR8-NEXT: stwu r1, -48(r1)
+; PPC-PWR8-NEXT: stw r0, 52(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
; PPC-PWR8-NEXT: .cfi_offset lr, 4
-; PPC-PWR8-NEXT: .cfi_offset r30, -8
; PPC-PWR8-NEXT: mr r4, r3
-; PPC-PWR8-NEXT: lwz r3, 92(r1)
-; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r30, r1, 56
-; PPC-PWR8-NEXT: stw r8, 68(r1)
-; PPC-PWR8-NEXT: stw r7, 64(r1)
-; PPC-PWR8-NEXT: stw r6, 60(r1)
-; PPC-PWR8-NEXT: addi r6, r1, 40
-; PPC-PWR8-NEXT: stw r5, 56(r1)
-; PPC-PWR8-NEXT: mr r5, r30
+; PPC-PWR8-NEXT: lwz r3, 60(r1)
+; PPC-PWR8-NEXT: stw r8, 44(r1)
+; PPC-PWR8-NEXT: stw r7, 40(r1)
+; PPC-PWR8-NEXT: stw r6, 36(r1)
+; PPC-PWR8-NEXT: stw r5, 32(r1)
+; PPC-PWR8-NEXT: addi r5, r1, 32
+; PPC-PWR8-NEXT: addi r6, r1, 16
; PPC-PWR8-NEXT: li r7, 3
; PPC-PWR8-NEXT: li r8, 0
-; PPC-PWR8-NEXT: stw r3, 52(r1)
-; PPC-PWR8-NEXT: lwz r3, 88(r1)
-; PPC-PWR8-NEXT: stw r10, 44(r1)
-; PPC-PWR8-NEXT: stw r9, 40(r1)
-; PPC-PWR8-NEXT: stw r3, 48(r1)
+; PPC-PWR8-NEXT: stw r10, 20(r1)
+; PPC-PWR8-NEXT: stw r9, 16(r1)
+; PPC-PWR8-NEXT: stw r3, 28(r1)
+; PPC-PWR8-NEXT: lwz r3, 56(r1)
+; PPC-PWR8-NEXT: stw r3, 24(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: lxvw4x vs0, 0, r30
-; PPC-PWR8-NEXT: addi r3, r1, 16
-; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: stxvw4x vs0, 0, r3
-; PPC-PWR8-NEXT: lwz r3, 16(r1)
-; PPC-PWR8-NEXT: lwz r4, 20(r1)
-; PPC-PWR8-NEXT: lwz r5, 24(r1)
-; PPC-PWR8-NEXT: lwz r6, 28(r1)
-; PPC-PWR8-NEXT: lwz r0, 84(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 80
+; PPC-PWR8-NEXT: lwz r6, 44(r1)
+; PPC-PWR8-NEXT: lwz r5, 40(r1)
+; PPC-PWR8-NEXT: lwz r4, 36(r1)
+; PPC-PWR8-NEXT: lwz r3, 32(r1)
+; PPC-PWR8-NEXT: lwz r0, 52(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 48
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
@@ -1308,31 +1246,22 @@ define i128 @cas_sc_sc(ptr %a, i128 %cmp, i128 %new) {
; PWR7-LABEL: cas_sc_sc:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
-; PWR7-NEXT: stdu r1, -176(r1)
-; PWR7-NEXT: std r0, 192(r1)
-; PWR7-NEXT: .cfi_def_cfa_offset 176
+; PWR7-NEXT: stdu r1, -128(r1)
+; PWR7-NEXT: std r0, 144(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 128
; PWR7-NEXT: .cfi_offset lr, 16
-; PWR7-NEXT: .cfi_offset r30, -16
-; PWR7-NEXT: std r30, 160(r1) # 8-byte Folded Spill
-; PWR7-NEXT: addi r30, r1, 144
-; PWR7-NEXT: std r5, 152(r1)
-; PWR7-NEXT: std r7, 136(r1)
-; PWR7-NEXT: std r6, 128(r1)
+; PWR7-NEXT: std r5, 120(r1)
+; PWR7-NEXT: std r4, 112(r1)
+; PWR7-NEXT: addi r4, r1, 112
; PWR7-NEXT: mr r5, r6
; PWR7-NEXT: mr r6, r7
; PWR7-NEXT: li r7, 5
; PWR7-NEXT: li r8, 5
-; PWR7-NEXT: std r4, 144(r1)
-; PWR7-NEXT: mr r4, r30
; PWR7-NEXT: bl __atomic_compare_exchange_16
; PWR7-NEXT: nop
-; PWR7-NEXT: lxvd2x vs0, 0, r30
-; PWR7-NEXT: addi r3, r1, 112
-; PWR7-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
-; PWR7-NEXT: stxvd2x vs0, 0, r3
-; PWR7-NEXT: ld r3, 112(r1)
; PWR7-NEXT: ld r4, 120(r1)
-; PWR7-NEXT: addi r1, r1, 176
+; PWR7-NEXT: ld r3, 112(r1)
+; PWR7-NEXT: addi r1, r1, 128
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
@@ -1384,40 +1313,33 @@ define i128 @cas_sc_sc(ptr %a, i128 %cmp, i128 %new) {
; PPC-PWR8-LABEL: cas_sc_sc:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -80(r1)
-; PPC-PWR8-NEXT: stw r0, 84(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
+; PPC-PWR8-NEXT: stwu r1, -48(r1)
+; PPC-PWR8-NEXT: stw r0, 52(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
; PPC-PWR8-NEXT: .cfi_offset lr, 4
-; PPC-PWR8-NEXT: .cfi_offset r30, -8
; PPC-PWR8-NEXT: mr r4, r3
-; PPC-PWR8-NEXT: lwz r3, 92(r1)
-; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r30, r1, 56
-; PPC-PWR8-NEXT: stw r8, 68(r1)
-; PPC-PWR8-NEXT: stw r7, 64(r1)
-; PPC-PWR8-NEXT: stw r6, 60(r1)
-; PPC-PWR8-NEXT: addi r6, r1, 40
-; PPC-PWR8-NEXT: stw r5, 56(r1)
-; PPC-PWR8-NEXT: mr r5, r30
+; PPC-PWR8-NEXT: lwz r3, 60(r1)
+; PPC-PWR8-NEXT: stw r8, 44(r1)
+; PPC-PWR8-NEXT: stw r7, 40(r1)
+; PPC-PWR8-NEXT: stw r6, 36(r1)
+; PPC-PWR8-NEXT: stw r5, 32(r1)
+; PPC-PWR8-NEXT: addi r5, r1, 32
+; PPC-PWR8-NEXT: addi r6, r1, 16
; PPC-PWR8-NEXT: li r7, 5
; PPC-PWR8-NEXT: li r8, 5
-; PPC-PWR8-NEXT: stw r3, 52(r1)
-; PPC-PWR8-NEXT: lwz r3, 88(r1)
-; PPC-PWR8-NEXT: stw r10, 44(r1)
-; PPC-PWR8-NEXT: stw r9, 40(r1)
-; PPC-PWR8-NEXT: stw r3, 48(r1)
+; PPC-PWR8-NEXT: stw r10, 20(r1)
+; PPC-PWR8-NEXT: stw r9, 16(r1)
+; PPC-PWR8-NEXT: stw r3, 28(r1)
+; PPC-PWR8-NEXT: lwz r3, 56(r1)
+; PPC-PWR8-NEXT: stw r3, 24(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: lxvw4x vs0, 0, r30
-; PPC-PWR8-NEXT: addi r3, r1, 16
-; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: stxvw4x vs0, 0, r3
-; PPC-PWR8-NEXT: lwz r3, 16(r1)
-; PPC-PWR8-NEXT: lwz r4, 20(r1)
-; PPC-PWR8-NEXT: lwz r5, 24(r1)
-; PPC-PWR8-NEXT: lwz r6, 28(r1)
-; PPC-PWR8-NEXT: lwz r0, 84(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 80
+; PPC-PWR8-NEXT: lwz r6, 44(r1)
+; PPC-PWR8-NEXT: lwz r5, 40(r1)
+; PPC-PWR8-NEXT: lwz r4, 36(r1)
+; PPC-PWR8-NEXT: lwz r3, 32(r1)
+; PPC-PWR8-NEXT: lwz r0, 52(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 48
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
@@ -1452,31 +1374,22 @@ define i128 @cas_acqrel_acquire(ptr %a, i128 %cmp, i128 %new) {
; PWR7-LABEL: cas_acqrel_acquire:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
-; PWR7-NEXT: stdu r1, -176(r1)
-; PWR7-NEXT: std r0, 192(r1)
-; PWR7-NEXT: .cfi_def_cfa_offset 176
+; PWR7-NEXT: stdu r1, -128(r1)
+; PWR7-NEXT: std r0, 144(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 128
; PWR7-NEXT: .cfi_offset lr, 16
-; PWR7-NEXT: .cfi_offset r30, -16
-; PWR7-NEXT: std r30, 160(r1) # 8-byte Folded Spill
-; PWR7-NEXT: addi r30, r1, 144
-; PWR7-NEXT: std r5, 152(r1)
-; PWR7-NEXT: std r7, 136(r1)
-; PWR7-NEXT: std r6, 128(r1)
+; PWR7-NEXT: std r5, 120(r1)
+; PWR7-NEXT: std r4, 112(r1)
+; PWR7-NEXT: addi r4, r1, 112
; PWR7-NEXT: mr r5, r6
; PWR7-NEXT: mr r6, r7
; PWR7-NEXT: li r7, 4
; PWR7-NEXT: li r8, 2
-; PWR7-NEXT: std r4, 144(r1)
-; PWR7-NEXT: mr r4, r30
; PWR7-NEXT: bl __atomic_compare_exchange_16
; PWR7-NEXT: nop
-; PWR7-NEXT: lxvd2x vs0, 0, r30
-; PWR7-NEXT: addi r3, r1, 112
-; PWR7-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
-; PWR7-NEXT: stxvd2x vs0, 0, r3
-; PWR7-NEXT: ld r3, 112(r1)
; PWR7-NEXT: ld r4, 120(r1)
-; PWR7-NEXT: addi r1, r1, 176
+; PWR7-NEXT: ld r3, 112(r1)
+; PWR7-NEXT: addi r1, r1, 128
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
@@ -1528,40 +1441,33 @@ define i128 @cas_acqrel_acquire(ptr %a, i128 %cmp, i128 %new) {
; PPC-PWR8-LABEL: cas_acqrel_acquire:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -80(r1)
-; PPC-PWR8-NEXT: stw r0, 84(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
+; PPC-PWR8-NEXT: stwu r1, -48(r1)
+; PPC-PWR8-NEXT: stw r0, 52(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
; PPC-PWR8-NEXT: .cfi_offset lr, 4
-; PPC-PWR8-NEXT: .cfi_offset r30, -8
; PPC-PWR8-NEXT: mr r4, r3
-; PPC-PWR8-NEXT: lwz r3, 92(r1)
-; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r30, r1, 56
-; PPC-PWR8-NEXT: stw r8, 68(r1)
-; PPC-PWR8-NEXT: stw r7, 64(r1)
-; PPC-PWR8-NEXT: stw r6, 60(r1)
-; PPC-PWR8-NEXT: addi r6, r1, 40
-; PPC-PWR8-NEXT: stw r5, 56(r1)
-; PPC-PWR8-NEXT: mr r5, r30
+; PPC-PWR8-NEXT: lwz r3, 60(r1)
+; PPC-PWR8-NEXT: stw r8, 44(r1)
+; PPC-PWR8-NEXT: stw r7, 40(r1)
+; PPC-PWR8-NEXT: stw r6, 36(r1)
+; PPC-PWR8-NEXT: stw r5, 32(r1)
+; PPC-PWR8-NEXT: addi r5, r1, 32
+; PPC-PWR8-NEXT: addi r6, r1, 16
; PPC-PWR8-NEXT: li r7, 4
; PPC-PWR8-NEXT: li r8, 2
-; PPC-PWR8-NEXT: stw r3, 52(r1)
-; PPC-PWR8-NEXT: lwz r3, 88(r1)
-; PPC-PWR8-NEXT: stw r10, 44(r1)
-; PPC-PWR8-NEXT: stw r9, 40(r1)
-; PPC-PWR8-NEXT: stw r3, 48(r1)
+; PPC-PWR8-NEXT: stw r10, 20(r1)
+; PPC-PWR8-NEXT: stw r9, 16(r1)
+; PPC-PWR8-NEXT: stw r3, 28(r1)
+; PPC-PWR8-NEXT: lwz r3, 56(r1)
+; PPC-PWR8-NEXT: stw r3, 24(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: lxvw4x vs0, 0, r30
-; PPC-PWR8-NEXT: addi r3, r1, 16
-; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: stxvw4x vs0, 0, r3
-; PPC-PWR8-NEXT: lwz r3, 16(r1)
-; PPC-PWR8-NEXT: lwz r4, 20(r1)
-; PPC-PWR8-NEXT: lwz r5, 24(r1)
-; PPC-PWR8-NEXT: lwz r6, 28(r1)
-; PPC-PWR8-NEXT: lwz r0, 84(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 80
+; PPC-PWR8-NEXT: lwz r6, 44(r1)
+; PPC-PWR8-NEXT: lwz r5, 40(r1)
+; PPC-PWR8-NEXT: lwz r4, 36(r1)
+; PPC-PWR8-NEXT: lwz r3, 32(r1)
+; PPC-PWR8-NEXT: lwz r0, 52(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 48
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
@@ -1599,31 +1505,20 @@ define i1 @cas_acqrel_acquire_check_succ(ptr %a, i128 %cmp, i128 %new) {
; PWR7-LABEL: cas_acqrel_acquire_check_succ:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
-; PWR7-NEXT: stdu r1, -176(r1)
-; PWR7-NEXT: std r0, 192(r1)
-; PWR7-NEXT: .cfi_def_cfa_offset 176
+; PWR7-NEXT: stdu r1, -128(r1)
+; PWR7-NEXT: std r0, 144(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 128
; PWR7-NEXT: .cfi_offset lr, 16
-; PWR7-NEXT: .cfi_offset r30, -16
-; PWR7-NEXT: std r30, 160(r1) # 8-byte Folded Spill
-; PWR7-NEXT: addi r30, r1, 144
-; PWR7-NEXT: std r5, 152(r1)
-; PWR7-NEXT: std r7, 136(r1)
-; PWR7-NEXT: std r6, 128(r1)
+; PWR7-NEXT: std r5, 120(r1)
+; PWR7-NEXT: std r4, 112(r1)
+; PWR7-NEXT: addi r4, r1, 112
; PWR7-NEXT: mr r5, r6
; PWR7-NEXT: mr r6, r7
; PWR7-NEXT: li r7, 4
; PWR7-NEXT: li r8, 2
-; PWR7-NEXT: std r4, 144(r1)
-; PWR7-NEXT: mr r4, r30
; PWR7-NEXT: bl __atomic_compare_exchange_16
; PWR7-NEXT: nop
-; PWR7-NEXT: lxvd2x vs0, 0, r30
-; PWR7-NEXT: cntlzw r3, r3
-; PWR7-NEXT: addi r4, r1, 112
-; PWR7-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
-; PWR7-NEXT: stxvd2x vs0, 0, r4
-; PWR7-NEXT: rlwinm r3, r3, 27, 31, 31
-; PWR7-NEXT: addi r1, r1, 176
+; PWR7-NEXT: addi r1, r1, 128
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
@@ -1681,39 +1576,29 @@ define i1 @cas_acqrel_acquire_check_succ(ptr %a, i128 %cmp, i128 %new) {
; PPC-PWR8-LABEL: cas_acqrel_acquire_check_succ:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -80(r1)
-; PPC-PWR8-NEXT: stw r0, 84(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
+; PPC-PWR8-NEXT: stwu r1, -48(r1)
+; PPC-PWR8-NEXT: stw r0, 52(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
; PPC-PWR8-NEXT: .cfi_offset lr, 4
-; PPC-PWR8-NEXT: .cfi_offset r30, -8
; PPC-PWR8-NEXT: mr r4, r3
-; PPC-PWR8-NEXT: lwz r3, 92(r1)
-; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r30, r1, 56
-; PPC-PWR8-NEXT: stw r8, 68(r1)
-; PPC-PWR8-NEXT: stw r7, 64(r1)
-; PPC-PWR8-NEXT: stw r6, 60(r1)
-; PPC-PWR8-NEXT: addi r6, r1, 40
-; PPC-PWR8-NEXT: stw r5, 56(r1)
-; PPC-PWR8-NEXT: mr r5, r30
+; PPC-PWR8-NEXT: lwz r3, 60(r1)
+; PPC-PWR8-NEXT: stw r8, 44(r1)
+; PPC-PWR8-NEXT: stw r7, 40(r1)
+; PPC-PWR8-NEXT: stw r6, 36(r1)
+; PPC-PWR8-NEXT: stw r5, 32(r1)
+; PPC-PWR8-NEXT: addi r5, r1, 32
+; PPC-PWR8-NEXT: addi r6, r1, 16
; PPC-PWR8-NEXT: li r7, 4
; PPC-PWR8-NEXT: li r8, 2
-; PPC-PWR8-NEXT: stw r3, 52(r1)
-; PPC-PWR8-NEXT: lwz r3, 88(r1)
-; PPC-PWR8-NEXT: stw r10, 44(r1)
-; PPC-PWR8-NEXT: stw r9, 40(r1)
-; PPC-PWR8-NEXT: stw r3, 48(r1)
+; PPC-PWR8-NEXT: stw r10, 20(r1)
+; PPC-PWR8-NEXT: stw r9, 16(r1)
+; PPC-PWR8-NEXT: stw r3, 28(r1)
+; PPC-PWR8-NEXT: lwz r3, 56(r1)
+; PPC-PWR8-NEXT: stw r3, 24(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: lxvw4x vs0, 0, r30
-; PPC-PWR8-NEXT: clrlwi r3, r3, 24
-; PPC-PWR8-NEXT: addi r4, r1, 16
-; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: cntlzw r3, r3
-; PPC-PWR8-NEXT: stxvw4x vs0, 0, r4
-; PPC-PWR8-NEXT: rlwinm r3, r3, 27, 31, 31
-; PPC-PWR8-NEXT: lwz r0, 84(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 80
+; PPC-PWR8-NEXT: lwz r0, 52(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 48
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
@@ -1751,31 +1636,20 @@ define i1 @bool_cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
; PWR7-LABEL: bool_cas_weak_acquire_acquire:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
-; PWR7-NEXT: stdu r1, -176(r1)
-; PWR7-NEXT: std r0, 192(r1)
-; PWR7-NEXT: .cfi_def_cfa_offset 176
+; PWR7-NEXT: stdu r1, -128(r1)
+; PWR7-NEXT: std r0, 144(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 128
; PWR7-NEXT: .cfi_offset lr, 16
-; PWR7-NEXT: .cfi_offset r30, -16
-; PWR7-NEXT: std r30, 160(r1) # 8-byte Folded Spill
-; PWR7-NEXT: addi r30, r1, 144
-; PWR7-NEXT: std r5, 152(r1)
-; PWR7-NEXT: std r7, 136(r1)
-; PWR7-NEXT: std r6, 128(r1)
+; PWR7-NEXT: std r5, 120(r1)
+; PWR7-NEXT: std r4, 112(r1)
+; PWR7-NEXT: addi r4, r1, 112
; PWR7-NEXT: mr r5, r6
; PWR7-NEXT: mr r6, r7
; PWR7-NEXT: li r7, 2
; PWR7-NEXT: li r8, 2
-; PWR7-NEXT: std r4, 144(r1)
-; PWR7-NEXT: mr r4, r30
; PWR7-NEXT: bl __atomic_compare_exchange_16
; PWR7-NEXT: nop
-; PWR7-NEXT: lxvd2x vs0, 0, r30
-; PWR7-NEXT: cntlzw r3, r3
-; PWR7-NEXT: addi r4, r1, 112
-; PWR7-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
-; PWR7-NEXT: stxvd2x vs0, 0, r4
-; PWR7-NEXT: rlwinm r3, r3, 27, 31, 31
-; PWR7-NEXT: addi r1, r1, 176
+; PWR7-NEXT: addi r1, r1, 128
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
@@ -1831,39 +1705,29 @@ define i1 @bool_cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
; PPC-PWR8-LABEL: bool_cas_weak_acquire_acquire:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
-; PPC-PWR8-NEXT: stwu r1, -80(r1)
-; PPC-PWR8-NEXT: stw r0, 84(r1)
-; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
+; PPC-PWR8-NEXT: stwu r1, -48(r1)
+; PPC-PWR8-NEXT: stw r0, 52(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
; PPC-PWR8-NEXT: .cfi_offset lr, 4
-; PPC-PWR8-NEXT: .cfi_offset r30, -8
; PPC-PWR8-NEXT: mr r4, r3
-; PPC-PWR8-NEXT: lwz r3, 92(r1)
-; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT: addi r30, r1, 56
-; PPC-PWR8-NEXT: stw r8, 68(r1)
-; PPC-PWR8-NEXT: stw r7, 64(r1)
-; PPC-PWR8-NEXT: stw r6, 60(r1)
-; PPC-PWR8-NEXT: addi r6, r1, 40
-; PPC-PWR8-NEXT: stw r5, 56(r1)
-; PPC-PWR8-NEXT: mr r5, r30
+; PPC-PWR8-NEXT: lwz r3, 60(r1)
+; PPC-PWR8-NEXT: stw r8, 44(r1)
+; PPC-PWR8-NEXT: stw r7, 40(r1)
+; PPC-PWR8-NEXT: stw r6, 36(r1)
+; PPC-PWR8-NEXT: stw r5, 32(r1)
+; PPC-PWR8-NEXT: addi r5, r1, 32
+; PPC-PWR8-NEXT: addi r6, r1, 16
; PPC-PWR8-NEXT: li r7, 2
; PPC-PWR8-NEXT: li r8, 2
-; PPC-PWR8-NEXT: stw r3, 52(r1)
-; PPC-PWR8-NEXT: lwz r3, 88(r1)
-; PPC-PWR8-NEXT: stw r10, 44(r1)
-; PPC-PWR8-NEXT: stw r9, 40(r1)
-; PPC-PWR8-NEXT: stw r3, 48(r1)
+; PPC-PWR8-NEXT: stw r10, 20(r1)
+; PPC-PWR8-NEXT: stw r9, 16(r1)
+; PPC-PWR8-NEXT: stw r3, 28(r1)
+; PPC-PWR8-NEXT: lwz r3, 56(r1)
+; PPC-PWR8-NEXT: stw r3, 24(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
-; PPC-PWR8-NEXT: lxvw4x vs0, 0, r30
-; PPC-PWR8-NEXT: clrlwi r3, r3, 24
-; PPC-PWR8-NEXT: addi r4, r1, 16
-; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
-; PPC-PWR8-NEXT: cntlzw r3, r3
-; PPC-PWR8-NEXT: stxvw4x vs0, 0, r4
-; PPC-PWR8-NEXT: rlwinm r3, r3, 27, 31, 31
-; PPC-PWR8-NEXT: lwz r0, 84(r1)
-; PPC-PWR8-NEXT: addi r1, r1, 80
+; PPC-PWR8-NEXT: lwz r0, 52(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 48
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
diff --git a/llvm/test/CodeGen/PowerPC/atomics.ll b/llvm/test/CodeGen/PowerPC/atomics.ll
index 52495c461e8244..ff5bec53acd257 100644
--- a/llvm/test/CodeGen/PowerPC/atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics.ll
@@ -282,28 +282,23 @@ define i64 @cas_weak_i64_release_monotonic(ptr %mem) {
; PPC32-LABEL: cas_weak_i64_release_monotonic:
; PPC32: # %bb.0:
; PPC32-NEXT: mflr r0
-; PPC32-NEXT: stwu r1, -32(r1)
-; PPC32-NEXT: stw r0, 36(r1)
-; PPC32-NEXT: .cfi_def_cfa_offset 32
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: .cfi_def_cfa_offset 16
; PPC32-NEXT: .cfi_offset lr, 4
; PPC32-NEXT: li r4, 0
-; PPC32-NEXT: li r5, 1
-; PPC32-NEXT: stw r4, 28(r1)
+; PPC32-NEXT: stw r4, 12(r1)
+; PPC32-NEXT: li r5, 0
+; PPC32-NEXT: stw r4, 8(r1)
+; PPC32-NEXT: addi r4, r1, 8
; PPC32-NEXT: li r6, 1
-; PPC32-NEXT: stw r4, 24(r1)
; PPC32-NEXT: li r7, 3
-; PPC32-NEXT: stw r5, 20(r1)
-; PPC32-NEXT: li r5, 0
-; PPC32-NEXT: stw r4, 16(r1)
-; PPC32-NEXT: addi r4, r1, 24
; PPC32-NEXT: li r8, 0
; PPC32-NEXT: bl __atomic_compare_exchange_8
-; PPC32-NEXT: lwz r4, 28(r1)
-; PPC32-NEXT: lwz r3, 24(r1)
-; PPC32-NEXT: stw r4, 12(r1)
-; PPC32-NEXT: stw r3, 8(r1)
-; PPC32-NEXT: lwz r0, 36(r1)
-; PPC32-NEXT: addi r1, r1, 32
+; PPC32-NEXT: lwz r4, 12(r1)
+; PPC32-NEXT: lwz r3, 8(r1)
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
; PPC32-NEXT: mtlr r0
; PPC32-NEXT: blr
;
diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
index d812283fc94395..a87b49e61a8dbc 100644
--- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
@@ -30,13 +30,10 @@ define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
-; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
-; RV32I-NEXT: lbu a0, 11(sp)
-; RV32I-NEXT: sb a0, 9(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -69,15 +66,10 @@ define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
-; RV64I-NEXT: sb a2, 6(sp)
-; RV64I-NEXT: slli a2, a2, 56
-; RV64I-NEXT: srai a2, a2, 56
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a0, 7(sp)
-; RV64I-NEXT: sb a0, 5(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -165,13 +157,10 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
-; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
-; RV32I-NEXT: lbu a0, 11(sp)
-; RV32I-NEXT: sb a0, 9(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -273,15 +262,10 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
-; RV64I-NEXT: sb a2, 6(sp)
-; RV64I-NEXT: slli a2, a2, 56
-; RV64I-NEXT: srai a2, a2, 56
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a0, 7(sp)
-; RV64I-NEXT: sb a0, 5(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -397,13 +381,10 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
-; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_1
-; RV32I-NEXT: lbu a0, 11(sp)
-; RV32I-NEXT: sb a0, 9(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -505,15 +486,10 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
-; RV64I-NEXT: sb a2, 6(sp)
-; RV64I-NEXT: slli a2, a2, 56
-; RV64I-NEXT: srai a2, a2, 56
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a0, 7(sp)
-; RV64I-NEXT: sb a0, 5(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -629,13 +605,10 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
-; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
-; RV32I-NEXT: lbu a0, 11(sp)
-; RV32I-NEXT: sb a0, 9(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -737,15 +710,10 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
-; RV64I-NEXT: sb a2, 6(sp)
-; RV64I-NEXT: slli a2, a2, 56
-; RV64I-NEXT: srai a2, a2, 56
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a0, 7(sp)
-; RV64I-NEXT: sb a0, 5(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -861,13 +829,10 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
-; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_1
-; RV32I-NEXT: lbu a0, 11(sp)
-; RV32I-NEXT: sb a0, 9(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -969,15 +934,10 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
-; RV64I-NEXT: sb a2, 6(sp)
-; RV64I-NEXT: slli a2, a2, 56
-; RV64I-NEXT: srai a2, a2, 56
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a0, 7(sp)
-; RV64I-NEXT: sb a0, 5(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -1093,13 +1053,10 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
-; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
-; RV32I-NEXT: lbu a0, 11(sp)
-; RV32I-NEXT: sb a0, 9(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -1201,15 +1158,10 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
-; RV64I-NEXT: sb a2, 6(sp)
-; RV64I-NEXT: slli a2, a2, 56
-; RV64I-NEXT: srai a2, a2, 56
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a0, 7(sp)
-; RV64I-NEXT: sb a0, 5(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -1325,13 +1277,10 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
-; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_1
-; RV32I-NEXT: lbu a0, 11(sp)
-; RV32I-NEXT: sb a0, 9(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -1433,15 +1382,10 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
-; RV64I-NEXT: sb a2, 6(sp)
-; RV64I-NEXT: slli a2, a2, 56
-; RV64I-NEXT: srai a2, a2, 56
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a0, 7(sp)
-; RV64I-NEXT: sb a0, 5(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -1557,13 +1501,10 @@ define void @cmpxchg_i8_seq_cst_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
-; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
-; RV32I-NEXT: lbu a0, 11(sp)
-; RV32I-NEXT: sb a0, 9(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -1596,15 +1537,10 @@ define void @cmpxchg_i8_seq_cst_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
-; RV64I-NEXT: sb a2, 6(sp)
-; RV64I-NEXT: slli a2, a2, 56
-; RV64I-NEXT: srai a2, a2, 56
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a0, 7(sp)
-; RV64I-NEXT: sb a0, 5(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -1697,13 +1633,10 @@ define void @cmpxchg_i8_seq_cst_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
-; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_1
-; RV32I-NEXT: lbu a0, 11(sp)
-; RV32I-NEXT: sb a0, 9(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -1736,15 +1669,10 @@ define void @cmpxchg_i8_seq_cst_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
-; RV64I-NEXT: sb a2, 6(sp)
-; RV64I-NEXT: slli a2, a2, 56
-; RV64I-NEXT: srai a2, a2, 56
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a0, 7(sp)
-; RV64I-NEXT: sb a0, 5(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -1837,13 +1765,10 @@ define void @cmpxchg_i8_seq_cst_seq_cst(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
-; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: call __atomic_compare_exchange_1
-; RV32I-NEXT: lbu a0, 11(sp)
-; RV32I-NEXT: sb a0, 9(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -1876,15 +1801,10 @@ define void @cmpxchg_i8_seq_cst_seq_cst(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
-; RV64I-NEXT: sb a2, 6(sp)
-; RV64I-NEXT: slli a2, a2, 56
-; RV64I-NEXT: srai a2, a2, 56
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a0, 7(sp)
-; RV64I-NEXT: sb a0, 5(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -1979,13 +1899,10 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
-; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a0, 10(sp)
-; RV32I-NEXT: sh a0, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -2019,15 +1936,10 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
-; RV64I-NEXT: sh a2, 4(sp)
-; RV64I-NEXT: slli a2, a2, 48
-; RV64I-NEXT: srai a2, a2, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a0, 6(sp)
-; RV64I-NEXT: sh a0, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -2118,13 +2030,10 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
-; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a0, 10(sp)
-; RV32I-NEXT: sh a0, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -2230,15 +2139,10 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
-; RV64I-NEXT: sh a2, 4(sp)
-; RV64I-NEXT: slli a2, a2, 48
-; RV64I-NEXT: srai a2, a2, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a0, 6(sp)
-; RV64I-NEXT: sh a0, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -2358,13 +2262,10 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
-; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a0, 10(sp)
-; RV32I-NEXT: sh a0, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -2470,15 +2371,10 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
-; RV64I-NEXT: sh a2, 4(sp)
-; RV64I-NEXT: slli a2, a2, 48
-; RV64I-NEXT: srai a2, a2, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a0, 6(sp)
-; RV64I-NEXT: sh a0, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -2598,13 +2494,10 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
-; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a0, 10(sp)
-; RV32I-NEXT: sh a0, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -2710,15 +2603,10 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
-; RV64I-NEXT: sh a2, 4(sp)
-; RV64I-NEXT: slli a2, a2, 48
-; RV64I-NEXT: srai a2, a2, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a0, 6(sp)
-; RV64I-NEXT: sh a0, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -2838,13 +2726,10 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
-; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a0, 10(sp)
-; RV32I-NEXT: sh a0, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -2950,15 +2835,10 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
-; RV64I-NEXT: sh a2, 4(sp)
-; RV64I-NEXT: slli a2, a2, 48
-; RV64I-NEXT: srai a2, a2, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a0, 6(sp)
-; RV64I-NEXT: sh a0, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -3078,13 +2958,10 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
-; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a0, 10(sp)
-; RV32I-NEXT: sh a0, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -3190,15 +3067,10 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
-; RV64I-NEXT: sh a2, 4(sp)
-; RV64I-NEXT: slli a2, a2, 48
-; RV64I-NEXT: srai a2, a2, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a0, 6(sp)
-; RV64I-NEXT: sh a0, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -3318,13 +3190,10 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
-; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a0, 10(sp)
-; RV32I-NEXT: sh a0, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -3430,15 +3299,10 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
-; RV64I-NEXT: sh a2, 4(sp)
-; RV64I-NEXT: slli a2, a2, 48
-; RV64I-NEXT: srai a2, a2, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a0, 6(sp)
-; RV64I-NEXT: sh a0, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -3558,13 +3422,10 @@ define void @cmpxchg_i16_seq_cst_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
-; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a0, 10(sp)
-; RV32I-NEXT: sh a0, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -3598,15 +3459,10 @@ define void @cmpxchg_i16_seq_cst_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
-; RV64I-NEXT: sh a2, 4(sp)
-; RV64I-NEXT: slli a2, a2, 48
-; RV64I-NEXT: srai a2, a2, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a0, 6(sp)
-; RV64I-NEXT: sh a0, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -3702,13 +3558,10 @@ define void @cmpxchg_i16_seq_cst_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
-; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a0, 10(sp)
-; RV32I-NEXT: sh a0, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -3742,15 +3595,10 @@ define void @cmpxchg_i16_seq_cst_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
-; RV64I-NEXT: sh a2, 4(sp)
-; RV64I-NEXT: slli a2, a2, 48
-; RV64I-NEXT: srai a2, a2, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a0, 6(sp)
-; RV64I-NEXT: sh a0, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -3846,13 +3694,10 @@ define void @cmpxchg_i16_seq_cst_seq_cst(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
-; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a0, 10(sp)
-; RV32I-NEXT: sh a0, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -3886,15 +3731,10 @@ define void @cmpxchg_i16_seq_cst_seq_cst(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
-; RV64I-NEXT: sh a2, 4(sp)
-; RV64I-NEXT: slli a2, a2, 48
-; RV64I-NEXT: srai a2, a2, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a0, 6(sp)
-; RV64I-NEXT: sh a0, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -3992,13 +3832,10 @@ define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounw
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a0, 8(sp)
-; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4032,19 +3869,15 @@ define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounw
;
; RV64I-LABEL: cmpxchg_i32_monotonic_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 20(sp)
-; RV64I-NEXT: sw a2, 16(sp)
-; RV64I-NEXT: sext.w a2, a2
-; RV64I-NEXT: addi a1, sp, 20
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a0, 20(sp)
-; RV64I-NEXT: sw a0, 12(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i32_monotonic_monotonic:
@@ -4090,13 +3923,10 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a0, 8(sp)
-; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4135,19 +3965,15 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
;
; RV64I-LABEL: cmpxchg_i32_acquire_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 20(sp)
-; RV64I-NEXT: sw a2, 16(sp)
-; RV64I-NEXT: sext.w a2, a2
-; RV64I-NEXT: addi a1, sp, 20
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a0, 20(sp)
-; RV64I-NEXT: sw a0, 12(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i32_acquire_monotonic:
@@ -4203,13 +4029,10 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 2
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a0, 8(sp)
-; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4248,19 +4071,15 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
;
; RV64I-LABEL: cmpxchg_i32_acquire_acquire:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 20(sp)
-; RV64I-NEXT: sw a2, 16(sp)
-; RV64I-NEXT: sext.w a2, a2
-; RV64I-NEXT: addi a1, sp, 20
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a0, 20(sp)
-; RV64I-NEXT: sw a0, 12(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i32_acquire_acquire:
@@ -4316,13 +4135,10 @@ define void @cmpxchg_i32_release_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a0, 8(sp)
-; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4361,19 +4177,15 @@ define void @cmpxchg_i32_release_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
;
; RV64I-LABEL: cmpxchg_i32_release_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 20(sp)
-; RV64I-NEXT: sw a2, 16(sp)
-; RV64I-NEXT: sext.w a2, a2
-; RV64I-NEXT: addi a1, sp, 20
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a0, 20(sp)
-; RV64I-NEXT: sw a0, 12(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i32_release_monotonic:
@@ -4429,13 +4241,10 @@ define void @cmpxchg_i32_release_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 3
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a0, 8(sp)
-; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4474,19 +4283,15 @@ define void @cmpxchg_i32_release_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
;
; RV64I-LABEL: cmpxchg_i32_release_acquire:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 20(sp)
-; RV64I-NEXT: sw a2, 16(sp)
-; RV64I-NEXT: sext.w a2, a2
-; RV64I-NEXT: addi a1, sp, 20
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a0, 20(sp)
-; RV64I-NEXT: sw a0, 12(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i32_release_acquire:
@@ -4542,13 +4347,10 @@ define void @cmpxchg_i32_acq_rel_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a0, 8(sp)
-; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4587,19 +4389,15 @@ define void @cmpxchg_i32_acq_rel_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
;
; RV64I-LABEL: cmpxchg_i32_acq_rel_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 20(sp)
-; RV64I-NEXT: sw a2, 16(sp)
-; RV64I-NEXT: sext.w a2, a2
-; RV64I-NEXT: addi a1, sp, 20
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a0, 20(sp)
-; RV64I-NEXT: sw a0, 12(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i32_acq_rel_monotonic:
@@ -4655,13 +4453,10 @@ define void @cmpxchg_i32_acq_rel_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 4
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a0, 8(sp)
-; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4700,19 +4495,15 @@ define void @cmpxchg_i32_acq_rel_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
;
; RV64I-LABEL: cmpxchg_i32_acq_rel_acquire:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 20(sp)
-; RV64I-NEXT: sw a2, 16(sp)
-; RV64I-NEXT: sext.w a2, a2
-; RV64I-NEXT: addi a1, sp, 20
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a0, 20(sp)
-; RV64I-NEXT: sw a0, 12(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i32_acq_rel_acquire:
@@ -4768,13 +4559,10 @@ define void @cmpxchg_i32_seq_cst_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a0, 8(sp)
-; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4813,19 +4601,15 @@ define void @cmpxchg_i32_seq_cst_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
;
; RV64I-LABEL: cmpxchg_i32_seq_cst_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 20(sp)
-; RV64I-NEXT: sw a2, 16(sp)
-; RV64I-NEXT: sext.w a2, a2
-; RV64I-NEXT: addi a1, sp, 20
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a0, 20(sp)
-; RV64I-NEXT: sw a0, 12(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_monotonic:
@@ -4881,13 +4665,10 @@ define void @cmpxchg_i32_seq_cst_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a0, 8(sp)
-; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4926,19 +4707,15 @@ define void @cmpxchg_i32_seq_cst_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
;
; RV64I-LABEL: cmpxchg_i32_seq_cst_acquire:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 20(sp)
-; RV64I-NEXT: sw a2, 16(sp)
-; RV64I-NEXT: sext.w a2, a2
-; RV64I-NEXT: addi a1, sp, 20
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a0, 20(sp)
-; RV64I-NEXT: sw a0, 12(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_acquire:
@@ -4994,13 +4771,10 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a0, 8(sp)
-; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -5041,19 +4815,15 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind
;
; RV64I-LABEL: cmpxchg_i32_seq_cst_seq_cst:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 20(sp)
-; RV64I-NEXT: sw a2, 16(sp)
-; RV64I-NEXT: sext.w a2, a2
-; RV64I-NEXT: addi a1, sp, 20
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a0, 20(sp)
-; RV64I-NEXT: sw a0, 12(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_seq_cst:
@@ -5110,90 +4880,47 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind
define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind {
; RV32I-LABEL: cmpxchg_i64_monotonic_monotonic:
; RV32I: # %bb.0:
-<<<<<<< HEAD
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw a2, 20(sp)
-; RV32I-NEXT: sw a1, 16(sp)
-; RV32I-NEXT: sw a4, 12(sp)
-; RV32I-NEXT: sw a3, 8(sp)
-; RV32I-NEXT: addi a1, sp, 16
-||||||| 04f65043bc87
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw a2, 4(sp)
-; RV32I-NEXT: sw a1, 0(sp)
-; RV32I-NEXT: mv a1, sp
-=======
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 0(sp)
; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: mv a1, sp
->>>>>>> main
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a0, 16(sp)
-; RV32I-NEXT: lw a1, 20(sp)
-; RV32I-NEXT: sw a0, 0(sp)
-; RV32I-NEXT: sw a1, 4(sp)
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32IA-LABEL: cmpxchg_i64_monotonic_monotonic:
; RV32IA: # %bb.0:
-<<<<<<< HEAD
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw a2, 20(sp)
-; RV32IA-NEXT: sw a1, 16(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
-; RV32IA-NEXT: sw a3, 8(sp)
-; RV32IA-NEXT: addi a1, sp, 16
-||||||| 04f65043bc87
-; RV32IA-NEXT: addi sp, sp, -16
-; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
-=======
; RV32IA-NEXT: addi sp, sp, -16
; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw a1, 0(sp)
; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: mv a1, sp
->>>>>>> main
; RV32IA-NEXT: mv a2, a3
; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a0, 16(sp)
-; RV32IA-NEXT: lw a1, 20(sp)
-; RV32IA-NEXT: sw a0, 0(sp)
-; RV32IA-NEXT: sw a1, 4(sp)
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
; RV32IA-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_monotonic_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a1, 16(sp)
-; RV64I-NEXT: sd a2, 8(sp)
-; RV64I-NEXT: addi a1, sp, 16
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a0, 16(sp)
-; RV64I-NEXT: sd a0, 0(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i64_monotonic_monotonic:
@@ -5234,84 +4961,49 @@ define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounw
define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind {
; RV32I-LABEL: cmpxchg_i64_acquire_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a5, a4
-<<<<<<< HEAD
-; RV32I-NEXT: sw a2, 20(sp)
-; RV32I-NEXT: sw a1, 16(sp)
-; RV32I-NEXT: sw a4, 12(sp)
-; RV32I-NEXT: sw a3, 8(sp)
-; RV32I-NEXT: addi a1, sp, 16
-||||||| 04f65043bc87
-; RV32I-NEXT: sw a2, 4(sp)
-; RV32I-NEXT: sw a1, 0(sp)
-; RV32I-NEXT: mv a1, sp
-=======
; RV32I-NEXT: sw a1, 0(sp)
; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: mv a1, sp
->>>>>>> main
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a0, 16(sp)
-; RV32I-NEXT: lw a1, 20(sp)
-; RV32I-NEXT: sw a0, 0(sp)
-; RV32I-NEXT: sw a1, 4(sp)
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32IA-LABEL: cmpxchg_i64_acquire_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a5, a4
-<<<<<<< HEAD
-; RV32IA-NEXT: sw a2, 20(sp)
-; RV32IA-NEXT: sw a1, 16(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
-; RV32IA-NEXT: sw a3, 8(sp)
-; RV32IA-NEXT: addi a1, sp, 16
-||||||| 04f65043bc87
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
-=======
; RV32IA-NEXT: sw a1, 0(sp)
; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: mv a1, sp
->>>>>>> main
; RV32IA-NEXT: li a4, 2
; RV32IA-NEXT: mv a2, a3
; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a0, 16(sp)
-; RV32IA-NEXT: lw a1, 20(sp)
-; RV32IA-NEXT: sw a0, 0(sp)
-; RV32IA-NEXT: sw a1, 4(sp)
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
; RV32IA-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_acquire_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a1, 16(sp)
-; RV64I-NEXT: sd a2, 8(sp)
-; RV64I-NEXT: addi a1, sp, 16
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a0, 16(sp)
-; RV64I-NEXT: sd a0, 0(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i64_acquire_monotonic:
@@ -5362,84 +5054,49 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind {
; RV32I-LABEL: cmpxchg_i64_acquire_acquire:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a6, a4
-<<<<<<< HEAD
-; RV32I-NEXT: sw a2, 20(sp)
-; RV32I-NEXT: sw a1, 16(sp)
-; RV32I-NEXT: sw a4, 12(sp)
-; RV32I-NEXT: sw a3, 8(sp)
-; RV32I-NEXT: addi a1, sp, 16
-||||||| 04f65043bc87
-; RV32I-NEXT: sw a2, 4(sp)
-; RV32I-NEXT: sw a1, 0(sp)
-; RV32I-NEXT: mv a1, sp
-=======
; RV32I-NEXT: sw a1, 0(sp)
; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: mv a1, sp
->>>>>>> main
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: mv a3, a6
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a0, 16(sp)
-; RV32I-NEXT: lw a1, 20(sp)
-; RV32I-NEXT: sw a0, 0(sp)
-; RV32I-NEXT: sw a1, 4(sp)
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32IA-LABEL: cmpxchg_i64_acquire_acquire:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a6, a4
-<<<<<<< HEAD
-; RV32IA-NEXT: sw a2, 20(sp)
-; RV32IA-NEXT: sw a1, 16(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
-; RV32IA-NEXT: sw a3, 8(sp)
-; RV32IA-NEXT: addi a1, sp, 16
-||||||| 04f65043bc87
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
-=======
; RV32IA-NEXT: sw a1, 0(sp)
; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: mv a1, sp
->>>>>>> main
; RV32IA-NEXT: li a4, 2
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a2, a3
; RV32IA-NEXT: mv a3, a6
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a0, 16(sp)
-; RV32IA-NEXT: lw a1, 20(sp)
-; RV32IA-NEXT: sw a0, 0(sp)
-; RV32IA-NEXT: sw a1, 4(sp)
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
; RV32IA-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_acquire_acquire:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a1, 16(sp)
-; RV64I-NEXT: sd a2, 8(sp)
-; RV64I-NEXT: addi a1, sp, 16
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a0, 16(sp)
-; RV64I-NEXT: sd a0, 0(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i64_acquire_acquire:
@@ -5490,84 +5147,49 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
define void @cmpxchg_i64_release_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind {
; RV32I-LABEL: cmpxchg_i64_release_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a5, a4
-<<<<<<< HEAD
-; RV32I-NEXT: sw a2, 20(sp)
-; RV32I-NEXT: sw a1, 16(sp)
-; RV32I-NEXT: sw a4, 12(sp)
-; RV32I-NEXT: sw a3, 8(sp)
-; RV32I-NEXT: addi a1, sp, 16
-||||||| 04f65043bc87
-; RV32I-NEXT: sw a2, 4(sp)
-; RV32I-NEXT: sw a1, 0(sp)
-; RV32I-NEXT: mv a1, sp
-=======
; RV32I-NEXT: sw a1, 0(sp)
; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: mv a1, sp
->>>>>>> main
; RV32I-NEXT: li a4, 3
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a0, 16(sp)
-; RV32I-NEXT: lw a1, 20(sp)
-; RV32I-NEXT: sw a0, 0(sp)
-; RV32I-NEXT: sw a1, 4(sp)
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32IA-LABEL: cmpxchg_i64_release_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a5, a4
-<<<<<<< HEAD
-; RV32IA-NEXT: sw a2, 20(sp)
-; RV32IA-NEXT: sw a1, 16(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
-; RV32IA-NEXT: sw a3, 8(sp)
-; RV32IA-NEXT: addi a1, sp, 16
-||||||| 04f65043bc87
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
-=======
; RV32IA-NEXT: sw a1, 0(sp)
; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: mv a1, sp
->>>>>>> main
; RV32IA-NEXT: li a4, 3
; RV32IA-NEXT: mv a2, a3
; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a0, 16(sp)
-; RV32IA-NEXT: lw a1, 20(sp)
-; RV32IA-NEXT: sw a0, 0(sp)
-; RV32IA-NEXT: sw a1, 4(sp)
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
; RV32IA-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_release_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a1, 16(sp)
-; RV64I-NEXT: sd a2, 8(sp)
-; RV64I-NEXT: addi a1, sp, 16
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a0, 16(sp)
-; RV64I-NEXT: sd a0, 0(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i64_release_monotonic:
@@ -5618,84 +5240,49 @@ define void @cmpxchg_i64_release_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
define void @cmpxchg_i64_release_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind {
; RV32I-LABEL: cmpxchg_i64_release_acquire:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a6, a4
-<<<<<<< HEAD
-; RV32I-NEXT: sw a2, 20(sp)
-; RV32I-NEXT: sw a1, 16(sp)
-; RV32I-NEXT: sw a4, 12(sp)
-; RV32I-NEXT: sw a3, 8(sp)
-; RV32I-NEXT: addi a1, sp, 16
-||||||| 04f65043bc87
-; RV32I-NEXT: sw a2, 4(sp)
-; RV32I-NEXT: sw a1, 0(sp)
-; RV32I-NEXT: mv a1, sp
-=======
; RV32I-NEXT: sw a1, 0(sp)
; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: mv a1, sp
->>>>>>> main
; RV32I-NEXT: li a4, 3
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: mv a3, a6
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a0, 16(sp)
-; RV32I-NEXT: lw a1, 20(sp)
-; RV32I-NEXT: sw a0, 0(sp)
-; RV32I-NEXT: sw a1, 4(sp)
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32IA-LABEL: cmpxchg_i64_release_acquire:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a6, a4
-<<<<<<< HEAD
-; RV32IA-NEXT: sw a2, 20(sp)
-; RV32IA-NEXT: sw a1, 16(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
-; RV32IA-NEXT: sw a3, 8(sp)
-; RV32IA-NEXT: addi a1, sp, 16
-||||||| 04f65043bc87
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
-=======
; RV32IA-NEXT: sw a1, 0(sp)
; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: mv a1, sp
->>>>>>> main
; RV32IA-NEXT: li a4, 3
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a2, a3
; RV32IA-NEXT: mv a3, a6
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a0, 16(sp)
-; RV32IA-NEXT: lw a1, 20(sp)
-; RV32IA-NEXT: sw a0, 0(sp)
-; RV32IA-NEXT: sw a1, 4(sp)
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
; RV32IA-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_release_acquire:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a1, 16(sp)
-; RV64I-NEXT: sd a2, 8(sp)
-; RV64I-NEXT: addi a1, sp, 16
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
; RV64I-NEXT: li a3, 3
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a0, 16(sp)
-; RV64I-NEXT: sd a0, 0(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i64_release_acquire:
@@ -5746,84 +5333,49 @@ define void @cmpxchg_i64_release_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
define void @cmpxchg_i64_acq_rel_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind {
; RV32I-LABEL: cmpxchg_i64_acq_rel_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a5, a4
-<<<<<<< HEAD
-; RV32I-NEXT: sw a2, 20(sp)
-; RV32I-NEXT: sw a1, 16(sp)
-; RV32I-NEXT: sw a4, 12(sp)
-; RV32I-NEXT: sw a3, 8(sp)
-; RV32I-NEXT: addi a1, sp, 16
-||||||| 04f65043bc87
-; RV32I-NEXT: sw a2, 4(sp)
-; RV32I-NEXT: sw a1, 0(sp)
-; RV32I-NEXT: mv a1, sp
-=======
; RV32I-NEXT: sw a1, 0(sp)
; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: mv a1, sp
->>>>>>> main
; RV32I-NEXT: li a4, 4
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a0, 16(sp)
-; RV32I-NEXT: lw a1, 20(sp)
-; RV32I-NEXT: sw a0, 0(sp)
-; RV32I-NEXT: sw a1, 4(sp)
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32IA-LABEL: cmpxchg_i64_acq_rel_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a5, a4
-<<<<<<< HEAD
-; RV32IA-NEXT: sw a2, 20(sp)
-; RV32IA-NEXT: sw a1, 16(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
-; RV32IA-NEXT: sw a3, 8(sp)
-; RV32IA-NEXT: addi a1, sp, 16
-||||||| 04f65043bc87
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
-=======
; RV32IA-NEXT: sw a1, 0(sp)
; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: mv a1, sp
->>>>>>> main
; RV32IA-NEXT: li a4, 4
; RV32IA-NEXT: mv a2, a3
; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a0, 16(sp)
-; RV32IA-NEXT: lw a1, 20(sp)
-; RV32IA-NEXT: sw a0, 0(sp)
-; RV32IA-NEXT: sw a1, 4(sp)
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
; RV32IA-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_acq_rel_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a1, 16(sp)
-; RV64I-NEXT: sd a2, 8(sp)
-; RV64I-NEXT: addi a1, sp, 16
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a0, 16(sp)
-; RV64I-NEXT: sd a0, 0(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i64_acq_rel_monotonic:
@@ -5874,84 +5426,49 @@ define void @cmpxchg_i64_acq_rel_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
define void @cmpxchg_i64_acq_rel_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind {
; RV32I-LABEL: cmpxchg_i64_acq_rel_acquire:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a6, a4
-<<<<<<< HEAD
-; RV32I-NEXT: sw a2, 20(sp)
-; RV32I-NEXT: sw a1, 16(sp)
-; RV32I-NEXT: sw a4, 12(sp)
-; RV32I-NEXT: sw a3, 8(sp)
-; RV32I-NEXT: addi a1, sp, 16
-||||||| 04f65043bc87
-; RV32I-NEXT: sw a2, 4(sp)
-; RV32I-NEXT: sw a1, 0(sp)
-; RV32I-NEXT: mv a1, sp
-=======
; RV32I-NEXT: sw a1, 0(sp)
; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: mv a1, sp
->>>>>>> main
; RV32I-NEXT: li a4, 4
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: mv a3, a6
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a0, 16(sp)
-; RV32I-NEXT: lw a1, 20(sp)
-; RV32I-NEXT: sw a0, 0(sp)
-; RV32I-NEXT: sw a1, 4(sp)
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32IA-LABEL: cmpxchg_i64_acq_rel_acquire:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a6, a4
-<<<<<<< HEAD
-; RV32IA-NEXT: sw a2, 20(sp)
-; RV32IA-NEXT: sw a1, 16(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
-; RV32IA-NEXT: sw a3, 8(sp)
-; RV32IA-NEXT: addi a1, sp, 16
-||||||| 04f65043bc87
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
-=======
; RV32IA-NEXT: sw a1, 0(sp)
; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: mv a1, sp
->>>>>>> main
; RV32IA-NEXT: li a4, 4
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a2, a3
; RV32IA-NEXT: mv a3, a6
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a0, 16(sp)
-; RV32IA-NEXT: lw a1, 20(sp)
-; RV32IA-NEXT: sw a0, 0(sp)
-; RV32IA-NEXT: sw a1, 4(sp)
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
; RV32IA-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_acq_rel_acquire:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a1, 16(sp)
-; RV64I-NEXT: sd a2, 8(sp)
-; RV64I-NEXT: addi a1, sp, 16
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
; RV64I-NEXT: li a3, 4
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a0, 16(sp)
-; RV64I-NEXT: sd a0, 0(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i64_acq_rel_acquire:
@@ -6002,84 +5519,49 @@ define void @cmpxchg_i64_acq_rel_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
define void @cmpxchg_i64_seq_cst_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind {
; RV32I-LABEL: cmpxchg_i64_seq_cst_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a5, a4
-<<<<<<< HEAD
-; RV32I-NEXT: sw a2, 20(sp)
-; RV32I-NEXT: sw a1, 16(sp)
-; RV32I-NEXT: sw a4, 12(sp)
-; RV32I-NEXT: sw a3, 8(sp)
-; RV32I-NEXT: addi a1, sp, 16
-||||||| 04f65043bc87
-; RV32I-NEXT: sw a2, 4(sp)
-; RV32I-NEXT: sw a1, 0(sp)
-; RV32I-NEXT: mv a1, sp
-=======
; RV32I-NEXT: sw a1, 0(sp)
; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: mv a1, sp
->>>>>>> main
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a0, 16(sp)
-; RV32I-NEXT: lw a1, 20(sp)
-; RV32I-NEXT: sw a0, 0(sp)
-; RV32I-NEXT: sw a1, 4(sp)
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32IA-LABEL: cmpxchg_i64_seq_cst_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a5, a4
-<<<<<<< HEAD
-; RV32IA-NEXT: sw a2, 20(sp)
-; RV32IA-NEXT: sw a1, 16(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
-; RV32IA-NEXT: sw a3, 8(sp)
-; RV32IA-NEXT: addi a1, sp, 16
-||||||| 04f65043bc87
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
-=======
; RV32IA-NEXT: sw a1, 0(sp)
; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: mv a1, sp
->>>>>>> main
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: mv a2, a3
; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a0, 16(sp)
-; RV32IA-NEXT: lw a1, 20(sp)
-; RV32IA-NEXT: sw a0, 0(sp)
-; RV32IA-NEXT: sw a1, 4(sp)
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
; RV32IA-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_seq_cst_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a1, 16(sp)
-; RV64I-NEXT: sd a2, 8(sp)
-; RV64I-NEXT: addi a1, sp, 16
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a0, 16(sp)
-; RV64I-NEXT: sd a0, 0(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_monotonic:
@@ -6130,84 +5612,49 @@ define void @cmpxchg_i64_seq_cst_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
define void @cmpxchg_i64_seq_cst_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind {
; RV32I-LABEL: cmpxchg_i64_seq_cst_acquire:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a6, a4
-<<<<<<< HEAD
-; RV32I-NEXT: sw a2, 20(sp)
-; RV32I-NEXT: sw a1, 16(sp)
-; RV32I-NEXT: sw a4, 12(sp)
-; RV32I-NEXT: sw a3, 8(sp)
-; RV32I-NEXT: addi a1, sp, 16
-||||||| 04f65043bc87
-; RV32I-NEXT: sw a2, 4(sp)
-; RV32I-NEXT: sw a1, 0(sp)
-; RV32I-NEXT: mv a1, sp
-=======
; RV32I-NEXT: sw a1, 0(sp)
; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: mv a1, sp
->>>>>>> main
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: mv a3, a6
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a0, 16(sp)
-; RV32I-NEXT: lw a1, 20(sp)
-; RV32I-NEXT: sw a0, 0(sp)
-; RV32I-NEXT: sw a1, 4(sp)
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32IA-LABEL: cmpxchg_i64_seq_cst_acquire:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a6, a4
-<<<<<<< HEAD
-; RV32IA-NEXT: sw a2, 20(sp)
-; RV32IA-NEXT: sw a1, 16(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
-; RV32IA-NEXT: sw a3, 8(sp)
-; RV32IA-NEXT: addi a1, sp, 16
-||||||| 04f65043bc87
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
-=======
; RV32IA-NEXT: sw a1, 0(sp)
; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: mv a1, sp
->>>>>>> main
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a2, a3
; RV32IA-NEXT: mv a3, a6
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a0, 16(sp)
-; RV32IA-NEXT: lw a1, 20(sp)
-; RV32IA-NEXT: sw a0, 0(sp)
-; RV32IA-NEXT: sw a1, 4(sp)
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
; RV32IA-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_seq_cst_acquire:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a1, 16(sp)
-; RV64I-NEXT: sd a2, 8(sp)
-; RV64I-NEXT: addi a1, sp, 16
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 2
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a0, 16(sp)
-; RV64I-NEXT: sd a0, 0(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_acquire:
@@ -6258,84 +5705,49 @@ define void @cmpxchg_i64_seq_cst_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
define void @cmpxchg_i64_seq_cst_seq_cst(ptr %ptr, i64 %cmp, i64 %val) nounwind {
; RV32I-LABEL: cmpxchg_i64_seq_cst_seq_cst:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a6, a4
-<<<<<<< HEAD
-; RV32I-NEXT: sw a2, 20(sp)
-; RV32I-NEXT: sw a1, 16(sp)
-; RV32I-NEXT: sw a4, 12(sp)
-; RV32I-NEXT: sw a3, 8(sp)
-; RV32I-NEXT: addi a1, sp, 16
-||||||| 04f65043bc87
-; RV32I-NEXT: sw a2, 4(sp)
-; RV32I-NEXT: sw a1, 0(sp)
-; RV32I-NEXT: mv a1, sp
-=======
; RV32I-NEXT: sw a1, 0(sp)
; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: mv a1, sp
->>>>>>> main
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: mv a3, a6
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a0, 16(sp)
-; RV32I-NEXT: lw a1, 20(sp)
-; RV32I-NEXT: sw a0, 0(sp)
-; RV32I-NEXT: sw a1, 4(sp)
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32IA-LABEL: cmpxchg_i64_seq_cst_seq_cst:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -32
-; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a6, a4
-<<<<<<< HEAD
-; RV32IA-NEXT: sw a2, 20(sp)
-; RV32IA-NEXT: sw a1, 16(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
-; RV32IA-NEXT: sw a3, 8(sp)
-; RV32IA-NEXT: addi a1, sp, 16
-||||||| 04f65043bc87
-; RV32IA-NEXT: sw a2, 4(sp)
-; RV32IA-NEXT: sw a1, 0(sp)
-; RV32IA-NEXT: mv a1, sp
-=======
; RV32IA-NEXT: sw a1, 0(sp)
; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: mv a1, sp
->>>>>>> main
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
; RV32IA-NEXT: mv a2, a3
; RV32IA-NEXT: mv a3, a6
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a0, 16(sp)
-; RV32IA-NEXT: lw a1, 20(sp)
-; RV32IA-NEXT: sw a0, 0(sp)
-; RV32IA-NEXT: sw a1, 4(sp)
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
; RV32IA-NEXT: ret
;
; RV64I-LABEL: cmpxchg_i64_seq_cst_seq_cst:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a1, 16(sp)
-; RV64I-NEXT: sd a2, 8(sp)
-; RV64I-NEXT: addi a1, sp, 16
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd a1, 0(sp)
+; RV64I-NEXT: mv a1, sp
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a0, 16(sp)
-; RV64I-NEXT: sd a0, 0(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_seq_cst:
diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll
index ac79dc8bc49e39..06594e35be8703 100644
--- a/llvm/test/CodeGen/RISCV/atomic-signext.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll
@@ -595,16 +595,13 @@ define signext i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB10_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB10_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
-; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: sb a3, 13(sp)
-; RV32I-NEXT: beqz a0, .LBB10_4
+; RV32I-NEXT: bnez a0, .LBB10_4
; RV32I-NEXT: .LBB10_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 24
@@ -664,37 +661,33 @@ define signext i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a2, 0(a0)
+; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 56
; RV64I-NEXT: srai s2, a0, 56
; RV64I-NEXT: j .LBB10_2
; RV64I-NEXT: .LBB10_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB10_2 Depth=1
-; RV64I-NEXT: sb a2, 15(sp)
-; RV64I-NEXT: sb a0, 14(sp)
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai a2, a0, 56
+; RV64I-NEXT: sb a3, 15(sp)
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a2, 15(sp)
-; RV64I-NEXT: sb a2, 13(sp)
-; RV64I-NEXT: beqz a0, .LBB10_4
+; RV64I-NEXT: lbu a3, 15(sp)
+; RV64I-NEXT: bnez a0, .LBB10_4
; RV64I-NEXT: .LBB10_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a2, 56
-; RV64I-NEXT: srai a1, a0, 56
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: blt s2, a1, .LBB10_1
+; RV64I-NEXT: slli a0, a3, 56
+; RV64I-NEXT: srai a0, a0, 56
+; RV64I-NEXT: mv a2, a3
+; RV64I-NEXT: blt s2, a0, .LBB10_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB10_2 Depth=1
-; RV64I-NEXT: mv a0, s1
+; RV64I-NEXT: mv a2, s1
; RV64I-NEXT: j .LBB10_1
; RV64I-NEXT: .LBB10_4: # %atomicrmw.end
-; RV64I-NEXT: slli a0, a2, 56
+; RV64I-NEXT: slli a0, a3, 56
; RV64I-NEXT: srai a0, a0, 56
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -754,16 +747,13 @@ define signext i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB11_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB11_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
-; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: sb a3, 13(sp)
-; RV32I-NEXT: beqz a0, .LBB11_4
+; RV32I-NEXT: bnez a0, .LBB11_4
; RV32I-NEXT: .LBB11_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 24
@@ -823,37 +813,33 @@ define signext i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a2, 0(a0)
+; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 56
; RV64I-NEXT: srai s2, a0, 56
; RV64I-NEXT: j .LBB11_2
; RV64I-NEXT: .LBB11_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB11_2 Depth=1
-; RV64I-NEXT: sb a2, 15(sp)
-; RV64I-NEXT: sb a0, 14(sp)
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai a2, a0, 56
+; RV64I-NEXT: sb a3, 15(sp)
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a2, 15(sp)
-; RV64I-NEXT: sb a2, 13(sp)
-; RV64I-NEXT: beqz a0, .LBB11_4
+; RV64I-NEXT: lbu a3, 15(sp)
+; RV64I-NEXT: bnez a0, .LBB11_4
; RV64I-NEXT: .LBB11_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a2, 56
-; RV64I-NEXT: srai a1, a0, 56
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: bge s2, a1, .LBB11_1
+; RV64I-NEXT: slli a0, a3, 56
+; RV64I-NEXT: srai a0, a0, 56
+; RV64I-NEXT: mv a2, a3
+; RV64I-NEXT: bge s2, a0, .LBB11_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB11_2 Depth=1
-; RV64I-NEXT: mv a0, s1
+; RV64I-NEXT: mv a2, s1
; RV64I-NEXT: j .LBB11_1
; RV64I-NEXT: .LBB11_4: # %atomicrmw.end
-; RV64I-NEXT: slli a0, a2, 56
+; RV64I-NEXT: slli a0, a3, 56
; RV64I-NEXT: srai a0, a0, 56
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -912,16 +898,13 @@ define signext i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB12_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB12_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
-; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: sb a3, 13(sp)
-; RV32I-NEXT: beqz a0, .LBB12_4
+; RV32I-NEXT: bnez a0, .LBB12_4
; RV32I-NEXT: .LBB12_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: andi a0, a3, 255
@@ -975,35 +958,31 @@ define signext i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a2, 0(a0)
+; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: andi s2, a1, 255
; RV64I-NEXT: j .LBB12_2
; RV64I-NEXT: .LBB12_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB12_2 Depth=1
-; RV64I-NEXT: sb a2, 15(sp)
-; RV64I-NEXT: sb a0, 14(sp)
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai a2, a0, 56
+; RV64I-NEXT: sb a3, 15(sp)
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a2, 15(sp)
-; RV64I-NEXT: sb a2, 13(sp)
-; RV64I-NEXT: beqz a0, .LBB12_4
+; RV64I-NEXT: lbu a3, 15(sp)
+; RV64I-NEXT: bnez a0, .LBB12_4
; RV64I-NEXT: .LBB12_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: andi a1, a2, 255
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: bltu s2, a1, .LBB12_1
+; RV64I-NEXT: andi a0, a3, 255
+; RV64I-NEXT: mv a2, a3
+; RV64I-NEXT: bltu s2, a0, .LBB12_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB12_2 Depth=1
-; RV64I-NEXT: mv a0, s1
+; RV64I-NEXT: mv a2, s1
; RV64I-NEXT: j .LBB12_1
; RV64I-NEXT: .LBB12_4: # %atomicrmw.end
-; RV64I-NEXT: slli a0, a2, 56
+; RV64I-NEXT: slli a0, a3, 56
; RV64I-NEXT: srai a0, a0, 56
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -1057,16 +1036,13 @@ define signext i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: .LBB13_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB13_2 Depth=1
; RV32I-NEXT: sb a3, 15(sp)
-; RV32I-NEXT: sb a2, 14(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a3, 15(sp)
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: sb a3, 13(sp)
-; RV32I-NEXT: beqz a0, .LBB13_4
+; RV32I-NEXT: bnez a0, .LBB13_4
; RV32I-NEXT: .LBB13_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: andi a0, a3, 255
@@ -1120,35 +1096,31 @@ define signext i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lbu a2, 0(a0)
+; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: andi s2, a1, 255
; RV64I-NEXT: j .LBB13_2
; RV64I-NEXT: .LBB13_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB13_2 Depth=1
-; RV64I-NEXT: sb a2, 15(sp)
-; RV64I-NEXT: sb a0, 14(sp)
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai a2, a0, 56
+; RV64I-NEXT: sb a3, 15(sp)
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a2, 15(sp)
-; RV64I-NEXT: sb a2, 13(sp)
-; RV64I-NEXT: beqz a0, .LBB13_4
+; RV64I-NEXT: lbu a3, 15(sp)
+; RV64I-NEXT: bnez a0, .LBB13_4
; RV64I-NEXT: .LBB13_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: andi a1, a2, 255
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: bgeu s2, a1, .LBB13_1
+; RV64I-NEXT: andi a0, a3, 255
+; RV64I-NEXT: mv a2, a3
+; RV64I-NEXT: bgeu s2, a0, .LBB13_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB13_2 Depth=1
-; RV64I-NEXT: mv a0, s1
+; RV64I-NEXT: mv a2, s1
; RV64I-NEXT: j .LBB13_1
; RV64I-NEXT: .LBB13_4: # %atomicrmw.end
-; RV64I-NEXT: slli a0, a2, 56
+; RV64I-NEXT: slli a0, a3, 56
; RV64I-NEXT: srai a0, a0, 56
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -1671,16 +1643,13 @@ define signext i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB21_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB21_2 Depth=1
; RV32I-NEXT: sh a3, 14(sp)
-; RV32I-NEXT: sh a2, 12(sp)
; RV32I-NEXT: addi a1, sp, 14
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lhu a3, 14(sp)
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: sh a3, 10(sp)
-; RV32I-NEXT: beqz a0, .LBB21_4
+; RV32I-NEXT: lh a3, 14(sp)
+; RV32I-NEXT: bnez a0, .LBB21_4
; RV32I-NEXT: .LBB21_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 16
@@ -1742,37 +1711,33 @@ define signext i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lhu a2, 0(a0)
+; RV64I-NEXT: lhu a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 48
; RV64I-NEXT: srai s2, a0, 48
; RV64I-NEXT: j .LBB21_2
; RV64I-NEXT: .LBB21_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB21_2 Depth=1
-; RV64I-NEXT: sh a2, 14(sp)
-; RV64I-NEXT: sh a0, 12(sp)
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai a2, a0, 48
+; RV64I-NEXT: sh a3, 14(sp)
; RV64I-NEXT: addi a1, sp, 14
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lhu a2, 14(sp)
-; RV64I-NEXT: sh a2, 10(sp)
-; RV64I-NEXT: beqz a0, .LBB21_4
+; RV64I-NEXT: lh a3, 14(sp)
+; RV64I-NEXT: bnez a0, .LBB21_4
; RV64I-NEXT: .LBB21_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a2, 48
-; RV64I-NEXT: srai a1, a0, 48
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: blt s2, a1, .LBB21_1
+; RV64I-NEXT: slli a0, a3, 48
+; RV64I-NEXT: srai a0, a0, 48
+; RV64I-NEXT: mv a2, a3
+; RV64I-NEXT: blt s2, a0, .LBB21_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB21_2 Depth=1
-; RV64I-NEXT: mv a0, s1
+; RV64I-NEXT: mv a2, s1
; RV64I-NEXT: j .LBB21_1
; RV64I-NEXT: .LBB21_4: # %atomicrmw.end
-; RV64I-NEXT: slli a0, a2, 48
+; RV64I-NEXT: slli a0, a3, 48
; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -1834,16 +1799,13 @@ define signext i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB22_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB22_2 Depth=1
; RV32I-NEXT: sh a3, 14(sp)
-; RV32I-NEXT: sh a2, 12(sp)
; RV32I-NEXT: addi a1, sp, 14
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lhu a3, 14(sp)
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: sh a3, 10(sp)
-; RV32I-NEXT: beqz a0, .LBB22_4
+; RV32I-NEXT: lh a3, 14(sp)
+; RV32I-NEXT: bnez a0, .LBB22_4
; RV32I-NEXT: .LBB22_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: slli a0, a3, 16
@@ -1905,37 +1867,33 @@ define signext i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lhu a2, 0(a0)
+; RV64I-NEXT: lhu a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: slli a0, a1, 48
; RV64I-NEXT: srai s2, a0, 48
; RV64I-NEXT: j .LBB22_2
; RV64I-NEXT: .LBB22_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB22_2 Depth=1
-; RV64I-NEXT: sh a2, 14(sp)
-; RV64I-NEXT: sh a0, 12(sp)
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai a2, a0, 48
+; RV64I-NEXT: sh a3, 14(sp)
; RV64I-NEXT: addi a1, sp, 14
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lhu a2, 14(sp)
-; RV64I-NEXT: sh a2, 10(sp)
-; RV64I-NEXT: beqz a0, .LBB22_4
+; RV64I-NEXT: lh a3, 14(sp)
+; RV64I-NEXT: bnez a0, .LBB22_4
; RV64I-NEXT: .LBB22_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: slli a0, a2, 48
-; RV64I-NEXT: srai a1, a0, 48
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: bge s2, a1, .LBB22_1
+; RV64I-NEXT: slli a0, a3, 48
+; RV64I-NEXT: srai a0, a0, 48
+; RV64I-NEXT: mv a2, a3
+; RV64I-NEXT: bge s2, a0, .LBB22_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB22_2 Depth=1
-; RV64I-NEXT: mv a0, s1
+; RV64I-NEXT: mv a2, s1
; RV64I-NEXT: j .LBB22_1
; RV64I-NEXT: .LBB22_4: # %atomicrmw.end
-; RV64I-NEXT: slli a0, a2, 48
+; RV64I-NEXT: slli a0, a3, 48
; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -1999,16 +1957,13 @@ define signext i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB23_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB23_2 Depth=1
; RV32I-NEXT: sh a1, 10(sp)
-; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lhu a1, 10(sp)
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: sh a1, 6(sp)
-; RV32I-NEXT: beqz a0, .LBB23_4
+; RV32I-NEXT: lh a1, 10(sp)
+; RV32I-NEXT: bnez a0, .LBB23_4
; RV32I-NEXT: .LBB23_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: and a0, a1, s2
@@ -2074,25 +2029,21 @@ define signext i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: .LBB23_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB23_2 Depth=1
; RV64I-NEXT: sh a1, 6(sp)
-; RV64I-NEXT: sh a0, 4(sp)
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lhu a1, 6(sp)
-; RV64I-NEXT: sh a1, 2(sp)
-; RV64I-NEXT: beqz a0, .LBB23_4
+; RV64I-NEXT: lh a1, 6(sp)
+; RV64I-NEXT: bnez a0, .LBB23_4
; RV64I-NEXT: .LBB23_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: and a2, a1, s2
-; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: bltu s3, a2, .LBB23_1
+; RV64I-NEXT: and a0, a1, s2
+; RV64I-NEXT: mv a2, a1
+; RV64I-NEXT: bltu s3, a0, .LBB23_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB23_2 Depth=1
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a2, s0
; RV64I-NEXT: j .LBB23_1
; RV64I-NEXT: .LBB23_4: # %atomicrmw.end
; RV64I-NEXT: slli a0, a1, 48
@@ -2154,16 +2105,13 @@ define signext i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: .LBB24_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB24_2 Depth=1
; RV32I-NEXT: sh a1, 10(sp)
-; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lhu a1, 10(sp)
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: sh a1, 6(sp)
-; RV32I-NEXT: beqz a0, .LBB24_4
+; RV32I-NEXT: lh a1, 10(sp)
+; RV32I-NEXT: bnez a0, .LBB24_4
; RV32I-NEXT: .LBB24_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: and a0, a1, s2
@@ -2229,25 +2177,21 @@ define signext i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64I-NEXT: .LBB24_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB24_2 Depth=1
; RV64I-NEXT: sh a1, 6(sp)
-; RV64I-NEXT: sh a0, 4(sp)
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai a2, a0, 48
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lhu a1, 6(sp)
-; RV64I-NEXT: sh a1, 2(sp)
-; RV64I-NEXT: beqz a0, .LBB24_4
+; RV64I-NEXT: lh a1, 6(sp)
+; RV64I-NEXT: bnez a0, .LBB24_4
; RV64I-NEXT: .LBB24_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: and a2, a1, s2
-; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: bgeu s3, a2, .LBB24_1
+; RV64I-NEXT: and a0, a1, s2
+; RV64I-NEXT: mv a2, a1
+; RV64I-NEXT: bgeu s3, a0, .LBB24_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB24_2 Depth=1
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a2, s0
; RV64I-NEXT: j .LBB24_1
; RV64I-NEXT: .LBB24_4: # %atomicrmw.end
; RV64I-NEXT: slli a0, a1, 48
@@ -2582,27 +2526,24 @@ define signext i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind {
define signext i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_max_i32_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB32_2
; RV32I-NEXT: .LBB32_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB32_2 Depth=1
-; RV32I-NEXT: sw a3, 16(sp)
-; RV32I-NEXT: sw a2, 12(sp)
-; RV32I-NEXT: addi a1, sp, 16
+; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: mv a1, sp
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 16(sp)
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: sw a3, 8(sp)
-; RV32I-NEXT: beqz a0, .LBB32_4
+; RV32I-NEXT: lw a3, 0(sp)
+; RV32I-NEXT: bnez a0, .LBB32_4
; RV32I-NEXT: .LBB32_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -2613,10 +2554,10 @@ define signext i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB32_1
; RV32I-NEXT: .LBB32_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_max_i32_monotonic:
@@ -2632,33 +2573,30 @@ define signext i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a2, 0(a0)
+; RV64I-NEXT: lw a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB32_2
; RV64I-NEXT: .LBB32_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB32_2 Depth=1
-; RV64I-NEXT: sw a2, 12(sp)
-; RV64I-NEXT: sw a0, 8(sp)
-; RV64I-NEXT: sext.w a2, a0
+; RV64I-NEXT: sw a3, 12(sp)
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a2, 12(sp)
-; RV64I-NEXT: sw a2, 4(sp)
-; RV64I-NEXT: beqz a0, .LBB32_4
+; RV64I-NEXT: lw a3, 12(sp)
+; RV64I-NEXT: bnez a0, .LBB32_4
; RV64I-NEXT: .LBB32_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: blt s2, a2, .LBB32_1
+; RV64I-NEXT: mv a2, a3
+; RV64I-NEXT: blt s2, a3, .LBB32_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB32_2 Depth=1
-; RV64I-NEXT: mv a0, s1
+; RV64I-NEXT: mv a2, s1
; RV64I-NEXT: j .LBB32_1
; RV64I-NEXT: .LBB32_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: mv a0, a3
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -2677,27 +2615,24 @@ define signext i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind {
define signext i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_min_i32_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB33_2
; RV32I-NEXT: .LBB33_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB33_2 Depth=1
-; RV32I-NEXT: sw a3, 16(sp)
-; RV32I-NEXT: sw a2, 12(sp)
-; RV32I-NEXT: addi a1, sp, 16
+; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: mv a1, sp
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 16(sp)
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: sw a3, 8(sp)
-; RV32I-NEXT: beqz a0, .LBB33_4
+; RV32I-NEXT: lw a3, 0(sp)
+; RV32I-NEXT: bnez a0, .LBB33_4
; RV32I-NEXT: .LBB33_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -2708,10 +2643,10 @@ define signext i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB33_1
; RV32I-NEXT: .LBB33_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_min_i32_monotonic:
@@ -2727,33 +2662,30 @@ define signext i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a2, 0(a0)
+; RV64I-NEXT: lw a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB33_2
; RV64I-NEXT: .LBB33_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB33_2 Depth=1
-; RV64I-NEXT: sw a2, 12(sp)
-; RV64I-NEXT: sw a0, 8(sp)
-; RV64I-NEXT: sext.w a2, a0
+; RV64I-NEXT: sw a3, 12(sp)
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a2, 12(sp)
-; RV64I-NEXT: sw a2, 4(sp)
-; RV64I-NEXT: beqz a0, .LBB33_4
+; RV64I-NEXT: lw a3, 12(sp)
+; RV64I-NEXT: bnez a0, .LBB33_4
; RV64I-NEXT: .LBB33_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: bge s2, a2, .LBB33_1
+; RV64I-NEXT: mv a2, a3
+; RV64I-NEXT: bge s2, a3, .LBB33_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB33_2 Depth=1
-; RV64I-NEXT: mv a0, s1
+; RV64I-NEXT: mv a2, s1
; RV64I-NEXT: j .LBB33_1
; RV64I-NEXT: .LBB33_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: mv a0, a3
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -2772,27 +2704,24 @@ define signext i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind {
define signext i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_umax_i32_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB34_2
; RV32I-NEXT: .LBB34_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB34_2 Depth=1
-; RV32I-NEXT: sw a3, 16(sp)
-; RV32I-NEXT: sw a2, 12(sp)
-; RV32I-NEXT: addi a1, sp, 16
+; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: mv a1, sp
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 16(sp)
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: sw a3, 8(sp)
-; RV32I-NEXT: beqz a0, .LBB34_4
+; RV32I-NEXT: lw a3, 0(sp)
+; RV32I-NEXT: bnez a0, .LBB34_4
; RV32I-NEXT: .LBB34_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -2803,10 +2732,10 @@ define signext i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB34_1
; RV32I-NEXT: .LBB34_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umax_i32_monotonic:
@@ -2822,33 +2751,30 @@ define signext i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a2, 0(a0)
+; RV64I-NEXT: lw a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB34_2
; RV64I-NEXT: .LBB34_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB34_2 Depth=1
-; RV64I-NEXT: sw a2, 12(sp)
-; RV64I-NEXT: sw a0, 8(sp)
-; RV64I-NEXT: sext.w a2, a0
+; RV64I-NEXT: sw a3, 12(sp)
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a2, 12(sp)
-; RV64I-NEXT: sw a2, 4(sp)
-; RV64I-NEXT: beqz a0, .LBB34_4
+; RV64I-NEXT: lw a3, 12(sp)
+; RV64I-NEXT: bnez a0, .LBB34_4
; RV64I-NEXT: .LBB34_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: bltu s2, a2, .LBB34_1
+; RV64I-NEXT: mv a2, a3
+; RV64I-NEXT: bltu s2, a3, .LBB34_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB34_2 Depth=1
-; RV64I-NEXT: mv a0, s1
+; RV64I-NEXT: mv a2, s1
; RV64I-NEXT: j .LBB34_1
; RV64I-NEXT: .LBB34_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: mv a0, a3
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -2867,27 +2793,24 @@ define signext i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind {
define signext i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-LABEL: atomicrmw_umin_i32_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: j .LBB35_2
; RV32I-NEXT: .LBB35_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB35_2 Depth=1
-; RV32I-NEXT: sw a3, 16(sp)
-; RV32I-NEXT: sw a2, 12(sp)
-; RV32I-NEXT: addi a1, sp, 16
+; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: mv a1, sp
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a3, 16(sp)
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: sw a3, 8(sp)
-; RV32I-NEXT: beqz a0, .LBB35_4
+; RV32I-NEXT: lw a3, 0(sp)
+; RV32I-NEXT: bnez a0, .LBB35_4
; RV32I-NEXT: .LBB35_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a3
@@ -2898,10 +2821,10 @@ define signext i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV32I-NEXT: j .LBB35_1
; RV32I-NEXT: .LBB35_4: # %atomicrmw.end
; RV32I-NEXT: mv a0, a3
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umin_i32_monotonic:
@@ -2917,33 +2840,30 @@ define signext i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lw a2, 0(a0)
+; RV64I-NEXT: lw a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: sext.w s2, a1
; RV64I-NEXT: j .LBB35_2
; RV64I-NEXT: .LBB35_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB35_2 Depth=1
-; RV64I-NEXT: sw a2, 12(sp)
-; RV64I-NEXT: sw a0, 8(sp)
-; RV64I-NEXT: sext.w a2, a0
+; RV64I-NEXT: sw a3, 12(sp)
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a2, 12(sp)
-; RV64I-NEXT: sw a2, 4(sp)
-; RV64I-NEXT: beqz a0, .LBB35_4
+; RV64I-NEXT: lw a3, 12(sp)
+; RV64I-NEXT: bnez a0, .LBB35_4
; RV64I-NEXT: .LBB35_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: bgeu s2, a2, .LBB35_1
+; RV64I-NEXT: mv a2, a3
+; RV64I-NEXT: bgeu s2, a3, .LBB35_1
; RV64I-NEXT: # %bb.3: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB35_2 Depth=1
-; RV64I-NEXT: mv a0, s1
+; RV64I-NEXT: mv a2, s1
; RV64I-NEXT: j .LBB35_1
; RV64I-NEXT: .LBB35_4: # %atomicrmw.end
-; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: mv a0, a3
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -3257,11 +3177,11 @@ define signext i64 @atomicrmw_xor_i64_monotonic(ptr %a, i64 %b) nounwind {
define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_max_i64_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -48
-; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 0(a0)
; RV32I-NEXT: lw a4, 4(a0)
@@ -3270,41 +3190,16 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB43_2
; RV32I-NEXT: .LBB43_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1
-<<<<<<< HEAD
-; RV32I-NEXT: sw a4, 24(sp)
-; RV32I-NEXT: sw a5, 28(sp)
-; RV32I-NEXT: sw a3, 20(sp)
-; RV32I-NEXT: sw a2, 16(sp)
-; RV32I-NEXT: addi a1, sp, 24
-||||||| 04f65043bc87
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
-=======
; RV32I-NEXT: sw a5, 8(sp)
; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
->>>>>>> main
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-<<<<<<< HEAD
-; RV32I-NEXT: lw a5, 28(sp)
-; RV32I-NEXT: lw a4, 24(sp)
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: beqz a0, .LBB43_7
-||||||| 04f65043bc87
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB43_7
-=======
; RV32I-NEXT: lw a5, 8(sp)
; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB43_7
->>>>>>> main
; RV32I-NEXT: .LBB43_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a4, s1, .LBB43_4
@@ -3325,23 +3220,6 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB43_1
; RV32I-NEXT: .LBB43_7: # %atomicrmw.end
-<<<<<<< HEAD
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 48
-||||||| 04f65043bc87
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
-=======
; RV32I-NEXT: mv a0, a5
; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -3349,16 +3227,15 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 32
->>>>>>> main
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_max_i64_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -48
-; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -32
+; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 0(a0)
; RV32IA-NEXT: lw a4, 4(a0)
@@ -3367,41 +3244,16 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB43_2
; RV32IA-NEXT: .LBB43_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB43_2 Depth=1
-<<<<<<< HEAD
-; RV32IA-NEXT: sw a4, 24(sp)
-; RV32IA-NEXT: sw a5, 28(sp)
-; RV32IA-NEXT: sw a3, 20(sp)
-; RV32IA-NEXT: sw a2, 16(sp)
-; RV32IA-NEXT: addi a1, sp, 24
-||||||| 04f65043bc87
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
-=======
; RV32IA-NEXT: sw a5, 8(sp)
; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
->>>>>>> main
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-<<<<<<< HEAD
-; RV32IA-NEXT: lw a5, 28(sp)
-; RV32IA-NEXT: lw a4, 24(sp)
-; RV32IA-NEXT: andi a0, a0, 255
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: beqz a0, .LBB43_7
-||||||| 04f65043bc87
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB43_7
-=======
; RV32IA-NEXT: lw a5, 8(sp)
; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB43_7
->>>>>>> main
; RV32IA-NEXT: .LBB43_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a4, s1, .LBB43_4
@@ -3422,23 +3274,6 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB43_1
; RV32IA-NEXT: .LBB43_7: # %atomicrmw.end
-<<<<<<< HEAD
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 48
-||||||| 04f65043bc87
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
-=======
; RV32IA-NEXT: mv a0, a5
; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -3446,31 +3281,28 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IA-NEXT: addi sp, sp, 32
->>>>>>> main
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i64_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB43_2
; RV64I-NEXT: .LBB43_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB43_2 Depth=1
-; RV64I-NEXT: sd a3, 16(sp)
-; RV64I-NEXT: sd a2, 8(sp)
-; RV64I-NEXT: addi a1, sp, 16
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: mv a1, sp
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 16(sp)
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: beqz a0, .LBB43_4
+; RV64I-NEXT: ld a3, 0(sp)
+; RV64I-NEXT: bnez a0, .LBB43_4
; RV64I-NEXT: .LBB43_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -3481,10 +3313,10 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB43_1
; RV64I-NEXT: .LBB43_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_max_i64_monotonic:
@@ -3498,11 +3330,11 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_min_i64_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -48
-; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 0(a0)
; RV32I-NEXT: lw a4, 4(a0)
@@ -3511,41 +3343,16 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB44_2
; RV32I-NEXT: .LBB44_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1
-<<<<<<< HEAD
-; RV32I-NEXT: sw a4, 24(sp)
-; RV32I-NEXT: sw a5, 28(sp)
-; RV32I-NEXT: sw a3, 20(sp)
-; RV32I-NEXT: sw a2, 16(sp)
-; RV32I-NEXT: addi a1, sp, 24
-||||||| 04f65043bc87
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
-=======
; RV32I-NEXT: sw a5, 8(sp)
; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
->>>>>>> main
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-<<<<<<< HEAD
-; RV32I-NEXT: lw a5, 28(sp)
-; RV32I-NEXT: lw a4, 24(sp)
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: beqz a0, .LBB44_7
-||||||| 04f65043bc87
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB44_7
-=======
; RV32I-NEXT: lw a5, 8(sp)
; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB44_7
->>>>>>> main
; RV32I-NEXT: .LBB44_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a4, s1, .LBB44_4
@@ -3566,23 +3373,6 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB44_1
; RV32I-NEXT: .LBB44_7: # %atomicrmw.end
-<<<<<<< HEAD
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 48
-||||||| 04f65043bc87
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
-=======
; RV32I-NEXT: mv a0, a5
; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -3590,16 +3380,15 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 32
->>>>>>> main
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_min_i64_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -48
-; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -32
+; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 0(a0)
; RV32IA-NEXT: lw a4, 4(a0)
@@ -3608,41 +3397,16 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB44_2
; RV32IA-NEXT: .LBB44_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB44_2 Depth=1
-<<<<<<< HEAD
-; RV32IA-NEXT: sw a4, 24(sp)
-; RV32IA-NEXT: sw a5, 28(sp)
-; RV32IA-NEXT: sw a3, 20(sp)
-; RV32IA-NEXT: sw a2, 16(sp)
-; RV32IA-NEXT: addi a1, sp, 24
-||||||| 04f65043bc87
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
-=======
; RV32IA-NEXT: sw a5, 8(sp)
; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
->>>>>>> main
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-<<<<<<< HEAD
-; RV32IA-NEXT: lw a5, 28(sp)
-; RV32IA-NEXT: lw a4, 24(sp)
-; RV32IA-NEXT: andi a0, a0, 255
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: beqz a0, .LBB44_7
-||||||| 04f65043bc87
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB44_7
-=======
; RV32IA-NEXT: lw a5, 8(sp)
; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB44_7
->>>>>>> main
; RV32IA-NEXT: .LBB44_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a4, s1, .LBB44_4
@@ -3663,23 +3427,6 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB44_1
; RV32IA-NEXT: .LBB44_7: # %atomicrmw.end
-<<<<<<< HEAD
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 48
-||||||| 04f65043bc87
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
-=======
; RV32IA-NEXT: mv a0, a5
; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -3687,31 +3434,28 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IA-NEXT: addi sp, sp, 32
->>>>>>> main
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i64_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB44_2
; RV64I-NEXT: .LBB44_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB44_2 Depth=1
-; RV64I-NEXT: sd a3, 16(sp)
-; RV64I-NEXT: sd a2, 8(sp)
-; RV64I-NEXT: addi a1, sp, 16
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: mv a1, sp
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 16(sp)
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: beqz a0, .LBB44_4
+; RV64I-NEXT: ld a3, 0(sp)
+; RV64I-NEXT: bnez a0, .LBB44_4
; RV64I-NEXT: .LBB44_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -3722,10 +3466,10 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB44_1
; RV64I-NEXT: .LBB44_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_min_i64_monotonic:
@@ -3739,11 +3483,11 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_umax_i64_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -48
-; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 0(a0)
; RV32I-NEXT: lw a4, 4(a0)
@@ -3752,41 +3496,16 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB45_2
; RV32I-NEXT: .LBB45_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1
-<<<<<<< HEAD
-; RV32I-NEXT: sw a4, 24(sp)
-; RV32I-NEXT: sw a5, 28(sp)
-; RV32I-NEXT: sw a3, 20(sp)
-; RV32I-NEXT: sw a2, 16(sp)
-; RV32I-NEXT: addi a1, sp, 24
-||||||| 04f65043bc87
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
-=======
; RV32I-NEXT: sw a5, 8(sp)
; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
->>>>>>> main
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-<<<<<<< HEAD
-; RV32I-NEXT: lw a5, 28(sp)
-; RV32I-NEXT: lw a4, 24(sp)
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: beqz a0, .LBB45_7
-||||||| 04f65043bc87
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB45_7
-=======
; RV32I-NEXT: lw a5, 8(sp)
; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB45_7
->>>>>>> main
; RV32I-NEXT: .LBB45_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a4, s1, .LBB45_4
@@ -3807,23 +3526,6 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB45_1
; RV32I-NEXT: .LBB45_7: # %atomicrmw.end
-<<<<<<< HEAD
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 48
-||||||| 04f65043bc87
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
-=======
; RV32I-NEXT: mv a0, a5
; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -3831,16 +3533,15 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 32
->>>>>>> main
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umax_i64_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -48
-; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -32
+; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 0(a0)
; RV32IA-NEXT: lw a4, 4(a0)
@@ -3849,41 +3550,16 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB45_2
; RV32IA-NEXT: .LBB45_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB45_2 Depth=1
-<<<<<<< HEAD
-; RV32IA-NEXT: sw a4, 24(sp)
-; RV32IA-NEXT: sw a5, 28(sp)
-; RV32IA-NEXT: sw a3, 20(sp)
-; RV32IA-NEXT: sw a2, 16(sp)
-; RV32IA-NEXT: addi a1, sp, 24
-||||||| 04f65043bc87
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
-=======
; RV32IA-NEXT: sw a5, 8(sp)
; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
->>>>>>> main
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-<<<<<<< HEAD
-; RV32IA-NEXT: lw a5, 28(sp)
-; RV32IA-NEXT: lw a4, 24(sp)
-; RV32IA-NEXT: andi a0, a0, 255
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: beqz a0, .LBB45_7
-||||||| 04f65043bc87
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB45_7
-=======
; RV32IA-NEXT: lw a5, 8(sp)
; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB45_7
->>>>>>> main
; RV32IA-NEXT: .LBB45_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a4, s1, .LBB45_4
@@ -3904,23 +3580,6 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB45_1
; RV32IA-NEXT: .LBB45_7: # %atomicrmw.end
-<<<<<<< HEAD
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 48
-||||||| 04f65043bc87
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
-=======
; RV32IA-NEXT: mv a0, a5
; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -3928,31 +3587,28 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IA-NEXT: addi sp, sp, 32
->>>>>>> main
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i64_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB45_2
; RV64I-NEXT: .LBB45_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB45_2 Depth=1
-; RV64I-NEXT: sd a3, 16(sp)
-; RV64I-NEXT: sd a2, 8(sp)
-; RV64I-NEXT: addi a1, sp, 16
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: mv a1, sp
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 16(sp)
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: beqz a0, .LBB45_4
+; RV64I-NEXT: ld a3, 0(sp)
+; RV64I-NEXT: bnez a0, .LBB45_4
; RV64I-NEXT: .LBB45_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -3963,10 +3619,10 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB45_1
; RV64I-NEXT: .LBB45_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_umax_i64_monotonic:
@@ -3980,11 +3636,11 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-LABEL: atomicrmw_umin_i64_monotonic:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -48
-; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lw a5, 0(a0)
; RV32I-NEXT: lw a4, 4(a0)
@@ -3993,41 +3649,16 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: j .LBB46_2
; RV32I-NEXT: .LBB46_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1
-<<<<<<< HEAD
-; RV32I-NEXT: sw a4, 24(sp)
-; RV32I-NEXT: sw a5, 28(sp)
-; RV32I-NEXT: sw a3, 20(sp)
-; RV32I-NEXT: sw a2, 16(sp)
-; RV32I-NEXT: addi a1, sp, 24
-||||||| 04f65043bc87
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: addi a1, sp, 8
-=======
; RV32I-NEXT: sw a5, 8(sp)
; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
->>>>>>> main
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-<<<<<<< HEAD
-; RV32I-NEXT: lw a5, 28(sp)
-; RV32I-NEXT: lw a4, 24(sp)
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: sw a5, 12(sp)
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: beqz a0, .LBB46_7
-||||||| 04f65043bc87
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
-; RV32I-NEXT: bnez a0, .LBB46_7
-=======
; RV32I-NEXT: lw a5, 8(sp)
; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB46_7
->>>>>>> main
; RV32I-NEXT: .LBB46_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: beq a4, s1, .LBB46_4
@@ -4048,23 +3679,6 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB46_1
; RV32I-NEXT: .LBB46_7: # %atomicrmw.end
-<<<<<<< HEAD
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 48
-||||||| 04f65043bc87
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
-=======
; RV32I-NEXT: mv a0, a5
; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -4072,16 +3686,15 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 32
->>>>>>> main
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umin_i64_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: addi sp, sp, -48
-; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: addi sp, sp, -32
+; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
; RV32IA-NEXT: lw a5, 0(a0)
; RV32IA-NEXT: lw a4, 4(a0)
@@ -4090,41 +3703,16 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: j .LBB46_2
; RV32IA-NEXT: .LBB46_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB46_2 Depth=1
-<<<<<<< HEAD
-; RV32IA-NEXT: sw a4, 24(sp)
-; RV32IA-NEXT: sw a5, 28(sp)
-; RV32IA-NEXT: sw a3, 20(sp)
-; RV32IA-NEXT: sw a2, 16(sp)
-; RV32IA-NEXT: addi a1, sp, 24
-||||||| 04f65043bc87
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: addi a1, sp, 8
-=======
; RV32IA-NEXT: sw a5, 8(sp)
; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
->>>>>>> main
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-<<<<<<< HEAD
-; RV32IA-NEXT: lw a5, 28(sp)
-; RV32IA-NEXT: lw a4, 24(sp)
-; RV32IA-NEXT: andi a0, a0, 255
-; RV32IA-NEXT: sw a5, 12(sp)
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: beqz a0, .LBB46_7
-||||||| 04f65043bc87
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
-; RV32IA-NEXT: bnez a0, .LBB46_7
-=======
; RV32IA-NEXT: lw a5, 8(sp)
; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB46_7
->>>>>>> main
; RV32IA-NEXT: .LBB46_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: beq a4, s1, .LBB46_4
@@ -4145,23 +3733,6 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB46_1
; RV32IA-NEXT: .LBB46_7: # %atomicrmw.end
-<<<<<<< HEAD
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 48
-||||||| 04f65043bc87
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
-; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT: addi sp, sp, 32
-=======
; RV32IA-NEXT: mv a0, a5
; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -4169,31 +3740,28 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IA-NEXT: addi sp, sp, 32
->>>>>>> main
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i64_monotonic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: j .LBB46_2
; RV64I-NEXT: .LBB46_1: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB46_2 Depth=1
-; RV64I-NEXT: sd a3, 16(sp)
-; RV64I-NEXT: sd a2, 8(sp)
-; RV64I-NEXT: addi a1, sp, 16
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: mv a1, sp
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_8
-; RV64I-NEXT: ld a3, 16(sp)
-; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: beqz a0, .LBB46_4
+; RV64I-NEXT: ld a3, 0(sp)
+; RV64I-NEXT: bnez a0, .LBB46_4
; RV64I-NEXT: .LBB46_2: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a3
@@ -4204,10 +3772,10 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV64I-NEXT: j .LBB46_1
; RV64I-NEXT: .LBB46_4: # %atomicrmw.end
; RV64I-NEXT: mv a0, a3
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_umin_i64_monotonic:
@@ -4224,14 +3792,11 @@ define signext i8 @cmpxchg_i8_monotonic_monotonic_val0(ptr %ptr, i8 signext %cmp
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
-; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
-; RV32I-NEXT: lbu a0, 11(sp)
-; RV32I-NEXT: sb a0, 9(sp)
-; RV32I-NEXT: lb a0, 9(sp)
+; RV32I-NEXT: lb a0, 11(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4267,14 +3832,11 @@ define signext i8 @cmpxchg_i8_monotonic_monotonic_val0(ptr %ptr, i8 signext %cmp
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
-; RV64I-NEXT: sb a2, 6(sp)
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a0, 7(sp)
-; RV64I-NEXT: sb a0, 5(sp)
-; RV64I-NEXT: lb a0, 5(sp)
+; RV64I-NEXT: lb a0, 7(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -4315,15 +3877,10 @@ define i1 @cmpxchg_i8_monotonic_monotonic_val1(ptr %ptr, i8 signext %cmp, i8 sig
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sb a1, 11(sp)
-; RV32I-NEXT: sb a2, 10(sp)
; RV32I-NEXT: addi a1, sp, 11
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_1
-; RV32I-NEXT: lbu a1, 11(sp)
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: seqz a0, a0
-; RV32I-NEXT: sb a1, 9(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4359,14 +3916,10 @@ define i1 @cmpxchg_i8_monotonic_monotonic_val1(ptr %ptr, i8 signext %cmp, i8 sig
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sb a1, 7(sp)
-; RV64I-NEXT: sb a2, 6(sp)
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_1
-; RV64I-NEXT: lbu a1, 7(sp)
-; RV64I-NEXT: seqz a0, a0
-; RV64I-NEXT: sb a1, 5(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -4407,14 +3960,11 @@ define signext i16 @cmpxchg_i16_monotonic_monotonic_val0(ptr %ptr, i16 signext %
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
-; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
; RV32I-NEXT: lh a0, 10(sp)
-; RV32I-NEXT: sh a0, 6(sp)
-; RV32I-NEXT: lh a0, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4451,14 +4001,11 @@ define signext i16 @cmpxchg_i16_monotonic_monotonic_val0(ptr %ptr, i16 signext %
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
-; RV64I-NEXT: sh a2, 4(sp)
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
; RV64I-NEXT: lh a0, 6(sp)
-; RV64I-NEXT: sh a0, 2(sp)
-; RV64I-NEXT: lh a0, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -4500,15 +4047,10 @@ define i1 @cmpxchg_i16_monotonic_monotonic_val1(ptr %ptr, i16 signext %cmp, i16
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sh a1, 10(sp)
-; RV32I-NEXT: sh a2, 8(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_2
-; RV32I-NEXT: lh a1, 10(sp)
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: seqz a0, a0
-; RV32I-NEXT: sh a1, 6(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4545,14 +4087,10 @@ define i1 @cmpxchg_i16_monotonic_monotonic_val1(ptr %ptr, i16 signext %cmp, i16
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sh a1, 6(sp)
-; RV64I-NEXT: sh a2, 4(sp)
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_2
-; RV64I-NEXT: lh a1, 6(sp)
-; RV64I-NEXT: seqz a0, a0
-; RV64I-NEXT: sh a1, 2(sp)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -4594,13 +4132,11 @@ define signext i32 @cmpxchg_i32_monotonic_monotonic_val0(ptr %ptr, i32 signext %
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
; RV32I-NEXT: lw a0, 8(sp)
-; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4625,19 +4161,16 @@ define signext i32 @cmpxchg_i32_monotonic_monotonic_val0(ptr %ptr, i32 signext %
;
; RV64I-LABEL: cmpxchg_i32_monotonic_monotonic_val0:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 20(sp)
-; RV64I-NEXT: sw a2, 16(sp)
-; RV64I-NEXT: addi a1, sp, 20
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a0, 20(sp)
-; RV64I-NEXT: sw a0, 12(sp)
-; RV64I-NEXT: lw a0, 12(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: lw a0, 4(sp)
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IA-NOZACAS-LABEL: cmpxchg_i32_monotonic_monotonic_val0:
@@ -4668,15 +4201,10 @@ define i1 @cmpxchg_i32_monotonic_monotonic_val1(ptr %ptr, i32 signext %cmp, i32
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a1, 8(sp)
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: seqz a0, a0
-; RV32I-NEXT: sw a1, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -4704,19 +4232,15 @@ define i1 @cmpxchg_i32_monotonic_monotonic_val1(ptr %ptr, i32 signext %cmp, i32
;
; RV64I-LABEL: cmpxchg_i32_monotonic_monotonic_val1:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 20(sp)
-; RV64I-NEXT: sw a2, 16(sp)
-; RV64I-NEXT: addi a1, sp, 20
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a1, 20(sp)
-; RV64I-NEXT: seqz a0, a0
-; RV64I-NEXT: sw a1, 12(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IA-NOZACAS-LABEL: cmpxchg_i32_monotonic_monotonic_val1:
@@ -5422,9 +4946,9 @@ merge:
define signext i32 @atomicrmw_max_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind {
; RV32I-LABEL: atomicrmw_max_i32_monotonic_crossbb:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: beqz a1, .LBB60_5
@@ -5433,17 +4957,14 @@ define signext i32 @atomicrmw_max_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32I-NEXT: j .LBB60_3
; RV32I-NEXT: .LBB60_2: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB60_3 Depth=1
-; RV32I-NEXT: sw a1, 20(sp)
-; RV32I-NEXT: sw a2, 16(sp)
-; RV32I-NEXT: addi a1, sp, 20
+; RV32I-NEXT: sw a1, 4(sp)
+; RV32I-NEXT: addi a1, sp, 4
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a1, 20(sp)
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: sw a1, 12(sp)
-; RV32I-NEXT: beqz a0, .LBB60_8
+; RV32I-NEXT: lw a1, 4(sp)
+; RV32I-NEXT: bnez a0, .LBB60_8
; RV32I-NEXT: .LBB60_3: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a1
@@ -5462,9 +4983,9 @@ define signext i32 @atomicrmw_max_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32I-NEXT: sw a0, 0(s0)
; RV32I-NEXT: .LBB60_8: # %merge
; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_max_i32_monotonic_crossbb:
@@ -5500,15 +5021,13 @@ define signext i32 @atomicrmw_max_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64I-NEXT: .LBB60_2: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB60_3 Depth=1
; RV64I-NEXT: sw a1, 12(sp)
-; RV64I-NEXT: sw a2, 8(sp)
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
; RV64I-NEXT: lw a1, 12(sp)
-; RV64I-NEXT: sw a1, 4(sp)
-; RV64I-NEXT: beqz a0, .LBB60_8
+; RV64I-NEXT: bnez a0, .LBB60_8
; RV64I-NEXT: .LBB60_3: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: li a0, 1
@@ -5572,10 +5091,10 @@ declare i32 @llvm.smax.i32(i32, i32)
define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind {
; RV32I-LABEL: atomicrmw_min_i32_monotonic_crossbb:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: beqz a1, .LBB61_5
@@ -5585,17 +5104,14 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32I-NEXT: j .LBB61_3
; RV32I-NEXT: .LBB61_2: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB61_3 Depth=1
-; RV32I-NEXT: sw a1, 16(sp)
-; RV32I-NEXT: sw a2, 12(sp)
-; RV32I-NEXT: addi a1, sp, 16
+; RV32I-NEXT: sw a1, 0(sp)
+; RV32I-NEXT: mv a1, sp
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a1, 16(sp)
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: sw a1, 8(sp)
-; RV32I-NEXT: beqz a0, .LBB61_8
+; RV32I-NEXT: lw a1, 0(sp)
+; RV32I-NEXT: bnez a0, .LBB61_8
; RV32I-NEXT: .LBB61_3: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a1
@@ -5614,10 +5130,10 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32I-NEXT: sw a0, 0(s0)
; RV32I-NEXT: .LBB61_8: # %merge
; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_min_i32_monotonic_crossbb:
@@ -5641,10 +5157,10 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
;
; RV64I-LABEL: atomicrmw_min_i32_monotonic_crossbb:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: beqz a1, .LBB61_5
@@ -5654,16 +5170,14 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64I-NEXT: j .LBB61_3
; RV64I-NEXT: .LBB61_2: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB61_3 Depth=1
-; RV64I-NEXT: sw a1, 20(sp)
-; RV64I-NEXT: sw a2, 16(sp)
-; RV64I-NEXT: addi a1, sp, 20
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a1, 20(sp)
-; RV64I-NEXT: sw a1, 12(sp)
-; RV64I-NEXT: beqz a0, .LBB61_8
+; RV64I-NEXT: lw a1, 4(sp)
+; RV64I-NEXT: bnez a0, .LBB61_8
; RV64I-NEXT: .LBB61_3: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a1
@@ -5682,10 +5196,10 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64I-NEXT: sw a0, 0(s0)
; RV64I-NEXT: .LBB61_8: # %merge
; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_min_i32_monotonic_crossbb:
@@ -5727,9 +5241,9 @@ declare i32 @llvm.smin.i32(i32, i32)
define signext i32 @atomicrmw_umax_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind {
; RV32I-LABEL: atomicrmw_umax_i32_monotonic_crossbb:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: beqz a1, .LBB62_3
@@ -5739,17 +5253,14 @@ define signext i32 @atomicrmw_umax_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: seqz a2, a1
; RV32I-NEXT: add a2, a1, a2
-; RV32I-NEXT: sw a1, 20(sp)
-; RV32I-NEXT: sw a2, 16(sp)
-; RV32I-NEXT: addi a1, sp, 20
+; RV32I-NEXT: sw a1, 4(sp)
+; RV32I-NEXT: addi a1, sp, 4
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a1, 20(sp)
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: sw a1, 12(sp)
-; RV32I-NEXT: bnez a0, .LBB62_2
+; RV32I-NEXT: lw a1, 4(sp)
+; RV32I-NEXT: beqz a0, .LBB62_2
; RV32I-NEXT: j .LBB62_4
; RV32I-NEXT: .LBB62_3: # %else
; RV32I-NEXT: lw a1, 0(s0)
@@ -5758,9 +5269,9 @@ define signext i32 @atomicrmw_umax_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32I-NEXT: sw a0, 0(s0)
; RV32I-NEXT: .LBB62_4: # %merge
; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umax_i32_monotonic_crossbb:
@@ -5793,15 +5304,13 @@ define signext i32 @atomicrmw_umax_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64I-NEXT: .LBB62_2: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB62_3 Depth=1
; RV64I-NEXT: sw a1, 12(sp)
-; RV64I-NEXT: sw a2, 8(sp)
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
; RV64I-NEXT: lw a1, 12(sp)
-; RV64I-NEXT: sw a1, 4(sp)
-; RV64I-NEXT: beqz a0, .LBB62_6
+; RV64I-NEXT: bnez a0, .LBB62_6
; RV64I-NEXT: .LBB62_3: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: li a0, 1
@@ -5859,10 +5368,10 @@ declare i32 @llvm.umax.i32(i32, i32)
define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind {
; RV32I-LABEL: atomicrmw_umin_i32_monotonic_crossbb:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: andi a1, a1, 1
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: beqz a1, .LBB63_5
@@ -5872,17 +5381,14 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32I-NEXT: j .LBB63_3
; RV32I-NEXT: .LBB63_2: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB63_3 Depth=1
-; RV32I-NEXT: sw a1, 16(sp)
-; RV32I-NEXT: sw a2, 12(sp)
-; RV32I-NEXT: addi a1, sp, 16
+; RV32I-NEXT: sw a1, 0(sp)
+; RV32I-NEXT: mv a1, sp
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: call __atomic_compare_exchange_4
-; RV32I-NEXT: lw a1, 16(sp)
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: sw a1, 8(sp)
-; RV32I-NEXT: beqz a0, .LBB63_8
+; RV32I-NEXT: lw a1, 0(sp)
+; RV32I-NEXT: bnez a0, .LBB63_8
; RV32I-NEXT: .LBB63_3: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a2, a1
@@ -5902,10 +5408,10 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32I-NEXT: sw a0, 0(s0)
; RV32I-NEXT: .LBB63_8: # %merge
; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_umin_i32_monotonic_crossbb:
@@ -5930,10 +5436,10 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
;
; RV64I-LABEL: atomicrmw_umin_i32_monotonic_crossbb:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: andi a1, a1, 1
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: beqz a1, .LBB63_5
@@ -5943,16 +5449,14 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64I-NEXT: j .LBB63_3
; RV64I-NEXT: .LBB63_2: # %atomicrmw.start
; RV64I-NEXT: # in Loop: Header=BB63_3 Depth=1
-; RV64I-NEXT: sw a1, 20(sp)
-; RV64I-NEXT: sw a2, 16(sp)
-; RV64I-NEXT: addi a1, sp, 20
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a3, 0
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a1, 20(sp)
-; RV64I-NEXT: sw a1, 12(sp)
-; RV64I-NEXT: beqz a0, .LBB63_8
+; RV64I-NEXT: lw a1, 4(sp)
+; RV64I-NEXT: bnez a0, .LBB63_8
; RV64I-NEXT: .LBB63_3: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a2, a1
@@ -5972,10 +5476,10 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64I-NEXT: sw a0, 0(s0)
; RV64I-NEXT: .LBB63_8: # %merge
; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_umin_i32_monotonic_crossbb:
@@ -6023,13 +5527,11 @@ define signext i32 @cmpxchg_i32_monotonic_crossbb(ptr %ptr, i32 signext %cmp, i3
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw a1, 8(sp)
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: call __atomic_compare_exchange_4
; RV32I-NEXT: lw a0, 8(sp)
-; RV32I-NEXT: sw a0, 0(sp)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
@@ -6072,18 +5574,16 @@ define signext i32 @cmpxchg_i32_monotonic_crossbb(ptr %ptr, i32 signext %cmp, i3
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a3, .LBB64_2
; RV64I-NEXT: # %bb.1: # %then
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sw a1, 20(sp)
-; RV64I-NEXT: sw a2, 16(sp)
-; RV64I-NEXT: addi a1, sp, 20
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sw a1, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: call __atomic_compare_exchange_4
-; RV64I-NEXT: lw a0, 20(sp)
-; RV64I-NEXT: sw a0, 12(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: lw a0, 4(sp)
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB64_2: # %else
; RV64I-NEXT: lw a0, 0(a0)
diff --git a/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll
index e62d4003fd9c92..0f9feeb17716af 100644
--- a/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll
@@ -117,7 +117,7 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; CHECK-LABEL: atomicrmw_uinc_wrap_i64:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: save %sp, -120, %sp
+; CHECK-NEXT: save %sp, -104, %sp
; CHECK-NEXT: .cfi_def_cfa_register %fp
; CHECK-NEXT: .cfi_window_save
; CHECK-NEXT: .cfi_register %o7, %i7
@@ -137,20 +137,17 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; CHECK-NEXT: cmp %g2, %i1
; CHECK-NEXT: move %icc, %g4, %i5
; CHECK-NEXT: cmp %i5, 0
-; CHECK-NEXT: movne %icc, 0, %o3
; CHECK-NEXT: movne %icc, 0, %o2
+; CHECK-NEXT: movne %icc, 0, %o3
; CHECK-NEXT: std %g2, [%fp+-8]
-; CHECK-NEXT: std %o2, [%fp+-16]
; CHECK-NEXT: mov %i0, %o0
; CHECK-NEXT: mov %i3, %o1
; CHECK-NEXT: mov %i4, %o4
; CHECK-NEXT: call __atomic_compare_exchange_8
; CHECK-NEXT: mov %i4, %o5
+; CHECK-NEXT: cmp %o0, 0
+; CHECK-NEXT: be %icc, .LBB3_1
; CHECK-NEXT: ldd [%fp+-8], %g2
-; CHECK-NEXT: and %o0, 255, %i5
-; CHECK-NEXT: cmp %i5, 0
-; CHECK-NEXT: bne %icc, .LBB3_1
-; CHECK-NEXT: std %g2, [%fp+-24]
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
; CHECK-NEXT: mov %g2, %i0
; CHECK-NEXT: ret
@@ -281,7 +278,7 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; CHECK-LABEL: atomicrmw_udec_wrap_i64:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: save %sp, -120, %sp
+; CHECK-NEXT: save %sp, -104, %sp
; CHECK-NEXT: .cfi_def_cfa_register %fp
; CHECK-NEXT: .cfi_window_save
; CHECK-NEXT: .cfi_register %o7, %i7
@@ -306,20 +303,17 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; CHECK-NEXT: move %icc, %l0, %g4
; CHECK-NEXT: or %i5, %g4, %i5
; CHECK-NEXT: cmp %i5, 0
-; CHECK-NEXT: movne %icc, %i2, %o3
; CHECK-NEXT: movne %icc, %i1, %o2
+; CHECK-NEXT: movne %icc, %i2, %o3
; CHECK-NEXT: std %g2, [%fp+-8]
-; CHECK-NEXT: std %o2, [%fp+-16]
; CHECK-NEXT: mov %i0, %o0
; CHECK-NEXT: mov %i3, %o1
; CHECK-NEXT: mov %i4, %o4
; CHECK-NEXT: call __atomic_compare_exchange_8
; CHECK-NEXT: mov %i4, %o5
+; CHECK-NEXT: cmp %o0, 0
+; CHECK-NEXT: be %icc, .LBB7_1
; CHECK-NEXT: ldd [%fp+-8], %g2
-; CHECK-NEXT: and %o0, 255, %i5
-; CHECK-NEXT: cmp %i5, 0
-; CHECK-NEXT: bne %icc, .LBB7_1
-; CHECK-NEXT: std %g2, [%fp+-24]
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
; CHECK-NEXT: mov %g2, %i0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/X86/atomic-xor.ll b/llvm/test/CodeGen/X86/atomic-xor.ll
index 73d11a9e30f593..c648ecdfbe674b 100644
--- a/llvm/test/CodeGen/X86/atomic-xor.ll
+++ b/llvm/test/CodeGen/X86/atomic-xor.ll
@@ -26,7 +26,7 @@ define i128 @xor128_signbit_used(ptr %p) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $64, %esp
+; X86-NEXT: subl $48, %esp
; X86-NEXT: movl 12(%ebp), %edi
; X86-NEXT: movl 12(%edi), %ecx
; X86-NEXT: movl 8(%edi), %edx
@@ -40,10 +40,10 @@ define i128 @xor128_signbit_used(ptr %p) nounwind {
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: addl $-2147483648, %ecx # imm = 0x80000000
-; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
; X86-NEXT: pushl $0
; X86-NEXT: pushl $0
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -55,15 +55,11 @@ define i128 @xor128_signbit_used(ptr %p) nounwind {
; X86-NEXT: calll __atomic_compare_exchange at PLT
; X86-NEXT: addl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl (%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
; X86-NEXT: testb %al, %al
-; X86-NEXT: jne .LBB1_1
+; X86-NEXT: je .LBB1_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %ebx, (%eax)
diff --git a/llvm/test/CodeGen/X86/atomic64.ll b/llvm/test/CodeGen/X86/atomic64.ll
index 59d260bb543dae..8f4da356e06cbb 100644
--- a/llvm/test/CodeGen/X86/atomic64.ll
+++ b/llvm/test/CodeGen/X86/atomic64.ll
@@ -328,7 +328,7 @@ define void @atomic_fetch_max64(i64 %x) nounwind {
; I486-NEXT: movl %esp, %ebp
; I486-NEXT: pushl %esi
; I486-NEXT: andl $-8, %esp
-; I486-NEXT: subl $88, %esp
+; I486-NEXT: subl $72, %esp
; I486-NEXT: movl 12(%ebp), %eax
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl 8(%ebp), %eax
@@ -359,16 +359,12 @@ define void @atomic_fetch_max64(i64 %x) nounwind {
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: .LBB6_4: # %atomicrmw.start
; I486-NEXT: # in Loop: Header=BB6_1 Depth=1
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; I486-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; I486-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; I486-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I486-NEXT: movl {{[0-9]+}}(%esp), %edx
; I486-NEXT: movl %esp, %eax
; I486-NEXT: movl %edx, 12(%eax)
; I486-NEXT: movl %ecx, 8(%eax)
@@ -379,16 +375,12 @@ define void @atomic_fetch_max64(i64 %x) nounwind {
; I486-NEXT: movl $sc64, (%eax)
; I486-NEXT: calll __atomic_compare_exchange_8 at PLT
; I486-NEXT: movb %al, %dl
-; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
-; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I486-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
; I486-NEXT: testb %dl, %dl
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: jne .LBB6_1
+; I486-NEXT: je .LBB6_1
; I486-NEXT: jmp .LBB6_2
; I486-NEXT: .LBB6_2: # %atomicrmw.end
; I486-NEXT: leal -4(%ebp), %esp
@@ -428,7 +420,7 @@ define void @atomic_fetch_min64(i64 %x) nounwind {
; I486-NEXT: movl %esp, %ebp
; I486-NEXT: pushl %esi
; I486-NEXT: andl $-8, %esp
-; I486-NEXT: subl $88, %esp
+; I486-NEXT: subl $72, %esp
; I486-NEXT: movl 12(%ebp), %eax
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl 8(%ebp), %eax
@@ -459,16 +451,12 @@ define void @atomic_fetch_min64(i64 %x) nounwind {
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: .LBB7_4: # %atomicrmw.start
; I486-NEXT: # in Loop: Header=BB7_1 Depth=1
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; I486-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; I486-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; I486-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I486-NEXT: movl {{[0-9]+}}(%esp), %edx
; I486-NEXT: movl %esp, %eax
; I486-NEXT: movl %edx, 12(%eax)
; I486-NEXT: movl %ecx, 8(%eax)
@@ -479,16 +467,12 @@ define void @atomic_fetch_min64(i64 %x) nounwind {
; I486-NEXT: movl $sc64, (%eax)
; I486-NEXT: calll __atomic_compare_exchange_8 at PLT
; I486-NEXT: movb %al, %dl
-; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
-; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I486-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
; I486-NEXT: testb %dl, %dl
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: jne .LBB7_1
+; I486-NEXT: je .LBB7_1
; I486-NEXT: jmp .LBB7_2
; I486-NEXT: .LBB7_2: # %atomicrmw.end
; I486-NEXT: leal -4(%ebp), %esp
@@ -528,7 +512,7 @@ define void @atomic_fetch_umax64(i64 %x) nounwind {
; I486-NEXT: movl %esp, %ebp
; I486-NEXT: pushl %esi
; I486-NEXT: andl $-8, %esp
-; I486-NEXT: subl $88, %esp
+; I486-NEXT: subl $72, %esp
; I486-NEXT: movl 12(%ebp), %eax
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl 8(%ebp), %eax
@@ -559,16 +543,12 @@ define void @atomic_fetch_umax64(i64 %x) nounwind {
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: .LBB8_4: # %atomicrmw.start
; I486-NEXT: # in Loop: Header=BB8_1 Depth=1
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; I486-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; I486-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; I486-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I486-NEXT: movl {{[0-9]+}}(%esp), %edx
; I486-NEXT: movl %esp, %eax
; I486-NEXT: movl %edx, 12(%eax)
; I486-NEXT: movl %ecx, 8(%eax)
@@ -579,16 +559,12 @@ define void @atomic_fetch_umax64(i64 %x) nounwind {
; I486-NEXT: movl $sc64, (%eax)
; I486-NEXT: calll __atomic_compare_exchange_8 at PLT
; I486-NEXT: movb %al, %dl
-; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
-; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I486-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
; I486-NEXT: testb %dl, %dl
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: jne .LBB8_1
+; I486-NEXT: je .LBB8_1
; I486-NEXT: jmp .LBB8_2
; I486-NEXT: .LBB8_2: # %atomicrmw.end
; I486-NEXT: leal -4(%ebp), %esp
@@ -628,7 +604,7 @@ define void @atomic_fetch_umin64(i64 %x) nounwind {
; I486-NEXT: movl %esp, %ebp
; I486-NEXT: pushl %esi
; I486-NEXT: andl $-8, %esp
-; I486-NEXT: subl $88, %esp
+; I486-NEXT: subl $72, %esp
; I486-NEXT: movl 12(%ebp), %eax
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl 8(%ebp), %eax
@@ -659,16 +635,12 @@ define void @atomic_fetch_umin64(i64 %x) nounwind {
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: .LBB9_4: # %atomicrmw.start
; I486-NEXT: # in Loop: Header=BB9_1 Depth=1
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; I486-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; I486-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; I486-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I486-NEXT: movl {{[0-9]+}}(%esp), %edx
; I486-NEXT: movl %esp, %eax
; I486-NEXT: movl %edx, 12(%eax)
; I486-NEXT: movl %ecx, 8(%eax)
@@ -679,16 +651,12 @@ define void @atomic_fetch_umin64(i64 %x) nounwind {
; I486-NEXT: movl $sc64, (%eax)
; I486-NEXT: calll __atomic_compare_exchange_8 at PLT
; I486-NEXT: movb %al, %dl
-; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
-; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I486-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
; I486-NEXT: testb %dl, %dl
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: jne .LBB9_1
+; I486-NEXT: je .LBB9_1
; I486-NEXT: jmp .LBB9_2
; I486-NEXT: .LBB9_2: # %atomicrmw.end
; I486-NEXT: leal -4(%ebp), %esp
@@ -714,26 +682,18 @@ define void @atomic_fetch_cmpxchg64() nounwind {
; I486-NEXT: pushl %ebp
; I486-NEXT: movl %esp, %ebp
; I486-NEXT: andl $-8, %esp
-; I486-NEXT: subl $48, %esp
-; I486-NEXT: movl $0, {{[0-9]+}}(%esp)
+; I486-NEXT: subl $32, %esp
; I486-NEXT: movl $0, {{[0-9]+}}(%esp)
; I486-NEXT: movl $0, {{[0-9]+}}(%esp)
-; I486-NEXT: movl $1, {{[0-9]+}}(%esp)
-; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I486-NEXT: movl {{[0-9]+}}(%esp), %edx
; I486-NEXT: movl %esp, %eax
-; I486-NEXT: movl %edx, 12(%eax)
-; I486-NEXT: movl %ecx, 8(%eax)
; I486-NEXT: leal {{[0-9]+}}(%esp), %ecx
; I486-NEXT: movl %ecx, 4(%eax)
; I486-NEXT: movl $2, 20(%eax)
; I486-NEXT: movl $2, 16(%eax)
+; I486-NEXT: movl $0, 12(%eax)
+; I486-NEXT: movl $1, 8(%eax)
; I486-NEXT: movl $sc64, (%eax)
; I486-NEXT: calll __atomic_compare_exchange_8 at PLT
-; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
-; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
-; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
; I486-NEXT: movl %ebp, %esp
; I486-NEXT: popl %ebp
; I486-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/cmpxchg8b.ll b/llvm/test/CodeGen/X86/cmpxchg8b.ll
index a598608a7d37db..10e957015047b8 100644
--- a/llvm/test/CodeGen/X86/cmpxchg8b.ll
+++ b/llvm/test/CodeGen/X86/cmpxchg8b.ll
@@ -33,12 +33,10 @@ define void @t1(ptr nocapture %p) nounwind ssp {
; I486-NEXT: pushl %ebp
; I486-NEXT: movl %esp, %ebp
; I486-NEXT: andl $-8, %esp
-; I486-NEXT: subl $24, %esp
+; I486-NEXT: subl $8, %esp
; I486-NEXT: movl 8(%ebp), %eax
; I486-NEXT: movl $0, {{[0-9]+}}(%esp)
; I486-NEXT: movl $0, (%esp)
-; I486-NEXT: movl $0, {{[0-9]+}}(%esp)
-; I486-NEXT: movl $1, {{[0-9]+}}(%esp)
; I486-NEXT: movl %esp, %ecx
; I486-NEXT: pushl $5
; I486-NEXT: pushl $5
@@ -48,10 +46,6 @@ define void @t1(ptr nocapture %p) nounwind ssp {
; I486-NEXT: pushl %eax
; I486-NEXT: calll __atomic_compare_exchange_8 at PLT
; I486-NEXT: addl $24, %esp
-; I486-NEXT: movl (%esp), %eax
-; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I486-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; I486-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; I486-NEXT: movl %ebp, %esp
; I486-NEXT: popl %ebp
; I486-NEXT: retl
diff --git a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
index bbb68129f6a16f..448d92e43a602c 100644
--- a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
+++ b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
@@ -55,7 +55,7 @@
## Yes, this means additions to TLI will fail this test, but the argument
## to -COUNT can't be an expression.
# AVAIL: TLI knows 528 symbols, 295 available
-# AVAIL-COUNT-289: {{^}} available
+# AVAIL-COUNT-295: {{^}} available
# AVAIL-NOT: {{^}} available
# UNAVAIL-COUNT-233: not available
# UNAVAIL-NOT: not available
>From 00499ce01862fa1f174b8467ee2304c1896c63cb Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Thu, 7 Nov 2024 18:59:33 +0100
Subject: [PATCH 05/17] Implement atomic load/write builtins / Use in
OMPIRBuilder
---
flang/lib/Semantics/CMakeLists.txt | 63 +-
.../llvm/Analysis/TargetLibraryInfo.def | 63 +-
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 7 +-
.../llvm/Transforms/Utils/BuildBuiltins.h | 291 +++---
.../llvm/Transforms/Utils/BuildLibCalls.h | 31 +-
llvm/lib/Analysis/TargetLibraryInfo.cpp | 22 +-
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 314 +++----
llvm/lib/Transforms/Utils/BuildBuiltins.cpp | 835 ++++++++++++++++--
llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 127 ++-
.../tools/llvm-tli-checker/ps4-tli-check.yaml | 34 +-
.../Analysis/TargetLibraryInfoTest.cpp | 11 +
.../Frontend/OpenMPIRBuilderTest.cpp | 10 +-
12 files changed, 1359 insertions(+), 449 deletions(-)
diff --git a/flang/lib/Semantics/CMakeLists.txt b/flang/lib/Semantics/CMakeLists.txt
index 41406ecf50e004..bbd172c2e39060 100644
--- a/flang/lib/Semantics/CMakeLists.txt
+++ b/flang/lib/Semantics/CMakeLists.txt
@@ -1,10 +1,49 @@
-add_flang_library(FortranSemantics
+add_flang_library(FortranSemantics PARTIAL_SOURCES_INTENDED
assignment.cpp
attr.cpp
canonicalize-acc.cpp
canonicalize-directives.cpp
canonicalize-do.cpp
canonicalize-omp.cpp
+ compute-offsets.cpp
+ data-to-inits.cpp
+ definable.cpp
+ expression.cpp
+ mod-file.cpp
+ pointer-assignment.cpp
+ program-tree.cpp
+ resolve-labels.cpp
+ resolve-directives.cpp
+ resolve-names-utils.cpp
+ resolve-names.cpp
+ rewrite-directives.cpp
+ rewrite-parse-tree.cpp
+ runtime-type-info.cpp
+ scope.cpp
+ semantics.cpp
+ symbol.cpp
+ tools.cpp
+ type.cpp
+ unparse-with-symbols.cpp
+
+ DEPENDS
+ acc_gen
+ omp_gen
+
+ LINK_LIBS
+ FortranCommon
+ FortranParser
+ FortranEvaluate
+
+ LINK_COMPONENTS
+ Support
+ FrontendOpenMP
+ FrontendOpenACC
+ TargetParser
+)
+
+
+add_flang_library(FortranSemanticsCheck PARTIAL_SOURCES_INTENDED
check-acc-structure.cpp
check-allocate.cpp
check-arithmeticif.cpp
@@ -26,26 +65,6 @@ add_flang_library(FortranSemantics
check-select-rank.cpp
check-select-type.cpp
check-stop.cpp
- compute-offsets.cpp
- data-to-inits.cpp
- definable.cpp
- expression.cpp
- mod-file.cpp
- pointer-assignment.cpp
- program-tree.cpp
- resolve-labels.cpp
- resolve-directives.cpp
- resolve-names-utils.cpp
- resolve-names.cpp
- rewrite-directives.cpp
- rewrite-parse-tree.cpp
- runtime-type-info.cpp
- scope.cpp
- semantics.cpp
- symbol.cpp
- tools.cpp
- type.cpp
- unparse-with-symbols.cpp
DEPENDS
acc_gen
@@ -62,3 +81,5 @@ add_flang_library(FortranSemantics
FrontendOpenACC
TargetParser
)
+
+target_link_libraries(FortranSemantics PUBLIC FortranSemanticsCheck)
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
index 0a2a89e2a68de8..760e932f3b3cad 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
@@ -462,38 +462,87 @@ TLI_DEFINE_ENUM_INTERNAL(atomic_load)
TLI_DEFINE_STRING_INTERNAL("__atomic_load")
TLI_DEFINE_SIG_INTERNAL(Void, SizeT, Ptr, Ptr, Int)
-/// void __atomic_store(size_t size, void *mptr, void *vptr, int smodel);
+/// int8_t __atomic_load_1(void *ptr, int memorder)
+TLI_DEFINE_ENUM_INTERNAL(atomic_load_1)
+TLI_DEFINE_STRING_INTERNAL("__atomic_load_1")
+TLI_DEFINE_SIG_INTERNAL(Int8, Ptr, Int)
+
+/// int16_t __atomic_load_2(void *ptr, int memorder)
+TLI_DEFINE_ENUM_INTERNAL(atomic_load_2)
+TLI_DEFINE_STRING_INTERNAL("__atomic_load_2")
+TLI_DEFINE_SIG_INTERNAL(Int16, Ptr, Int)
+
+/// int32_t __atomic_load_4(void *ptr, int memorder)
+TLI_DEFINE_ENUM_INTERNAL(atomic_load_4)
+TLI_DEFINE_STRING_INTERNAL("__atomic_load_4")
+TLI_DEFINE_SIG_INTERNAL(Int32, Ptr, Int)
+
+/// int64_t __atomic_load_8(void *ptr int memorder)
+TLI_DEFINE_ENUM_INTERNAL(atomic_load_8)
+TLI_DEFINE_STRING_INTERNAL("__atomic_load_8")
+TLI_DEFINE_SIG_INTERNAL(Int64, Ptr, Int)
+
+/// int128_t __atomic_load_16(void *ptr, int memorder)
+TLI_DEFINE_ENUM_INTERNAL(atomic_load_16)
+TLI_DEFINE_STRING_INTERNAL("__atomic_load_16")
+TLI_DEFINE_SIG_INTERNAL(Int128, Ptr, Int)
+
+/// void __atomic_store(size_t size, void *ptr, void *val, int smodel)
TLI_DEFINE_ENUM_INTERNAL(atomic_store)
TLI_DEFINE_STRING_INTERNAL("__atomic_store")
TLI_DEFINE_SIG_INTERNAL(Void, SizeT, Ptr, Ptr, Int)
+/// void __atomic_store_1(void *ptr, int8_t val, int smodel)
+TLI_DEFINE_ENUM_INTERNAL(atomic_store_1)
+TLI_DEFINE_STRING_INTERNAL("__atomic_store_1")
+TLI_DEFINE_SIG_INTERNAL(Void, Ptr, Int8, Int)
-/// bool __atomic_compare_exchange(size_t size, void *obj, void *expected, void *desired, int success, int failure)
+/// void __atomic_store_2(void *ptr, int16_t val, int smodel)
+TLI_DEFINE_ENUM_INTERNAL(atomic_store_2)
+TLI_DEFINE_STRING_INTERNAL("__atomic_store_2")
+TLI_DEFINE_SIG_INTERNAL(Void, Ptr, Int16, Int)
+
+/// void __atomic_store_4(void *ptr, int32_t val, int smodel)
+TLI_DEFINE_ENUM_INTERNAL(atomic_store_4)
+TLI_DEFINE_STRING_INTERNAL("__atomic_store_4")
+TLI_DEFINE_SIG_INTERNAL(Void, Ptr, Int32, Int)
+
+/// void __atomic_store_8(void *ptr, int64_t val, int smodel)
+TLI_DEFINE_ENUM_INTERNAL(atomic_store_8)
+TLI_DEFINE_STRING_INTERNAL("__atomic_store_8")
+TLI_DEFINE_SIG_INTERNAL(Void, Ptr, Int64, Int)
+
+/// void __atomic_store_16(void *ptr, int128_t val, int smodel)
+TLI_DEFINE_ENUM_INTERNAL(atomic_store_16)
+TLI_DEFINE_STRING_INTERNAL("__atomic_store_16")
+TLI_DEFINE_SIG_INTERNAL(Void, Ptr, Int128, Int)
+
+/// bool __atomic_compare_exchange(size_t size, void *ptr, void *expected, void *desired, int success, int failure)
TLI_DEFINE_ENUM_INTERNAL(atomic_compare_exchange)
TLI_DEFINE_STRING_INTERNAL("__atomic_compare_exchange")
TLI_DEFINE_SIG_INTERNAL(Bool, SizeT, Ptr, Ptr, Ptr, Int, Int)
-/// bool __atomic_compare_exchange_1(void *obj, void *expected, uint8_t desired, int success, int failure)
+/// bool __atomic_compare_exchange_1(void *ptr, void *expected, uint8_t desired, int success, int failure)
TLI_DEFINE_ENUM_INTERNAL(atomic_compare_exchange_1)
TLI_DEFINE_STRING_INTERNAL("__atomic_compare_exchange_1")
TLI_DEFINE_SIG_INTERNAL(Bool, Ptr, Ptr, Int8, Int, Int)
-/// bool __atomic_compare_exchange_2(void *obj, void *expected, uint16_t desired, int success, int failure)
+/// bool __atomic_compare_exchange_2(void *ptr, void *expected, uint16_t desired, int success, int failure)
TLI_DEFINE_ENUM_INTERNAL(atomic_compare_exchange_2)
TLI_DEFINE_STRING_INTERNAL("__atomic_compare_exchange_2")
TLI_DEFINE_SIG_INTERNAL(Bool, Ptr, Ptr, Int16, Int, Int)
-/// bool __atomic_compare_exchange_4(void *obj, void *expected, uint32_t desired, int success, int failure)
+/// bool __atomic_compare_exchange_4(void *ptr, void *expected, uint32_t desired, int success, int failure)
TLI_DEFINE_ENUM_INTERNAL(atomic_compare_exchange_4)
TLI_DEFINE_STRING_INTERNAL("__atomic_compare_exchange_4")
TLI_DEFINE_SIG_INTERNAL(Bool, Ptr, Ptr, Int32, Int, Int)
-/// bool __atomic_compare_exchange_8(void *obj, void *expected, uint64_t desired, int success, int failure)
+/// bool __atomic_compare_exchange_8(void *ptr, void *expected, uint64_t desired, int success, int failure)
TLI_DEFINE_ENUM_INTERNAL(atomic_compare_exchange_8)
TLI_DEFINE_STRING_INTERNAL("__atomic_compare_exchange_8")
TLI_DEFINE_SIG_INTERNAL(Bool, Ptr, Ptr, Int64, Int, Int)
-/// bool __atomic_compare_exchange_16(void *obj, void *expected, uint128_t desired, int success, int failure)
+/// bool __atomic_compare_exchange_16(void *ptr, void *expected, uint128_t desired, int success, int failure)
TLI_DEFINE_ENUM_INTERNAL(atomic_compare_exchange_16)
TLI_DEFINE_STRING_INTERNAL("__atomic_compare_exchange_16")
TLI_DEFINE_SIG_INTERNAL(Bool, Ptr, Ptr, Int128, Int, Int)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 3afb9d84278e81..ce6dd56f52a631 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -3064,7 +3064,6 @@ class OpenMPIRBuilder {
/// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
/// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
- /// Only Scalar data types.
///
/// \param AllocaIP The insertion point to be used for alloca
/// instructions.
@@ -3116,7 +3115,7 @@ class OpenMPIRBuilder {
bool IsVolatile = false;
};
- /// Emit atomic Read for : V = X --- Only Scalar data types.
+ /// Emit atomic Read for : V = X.
///
/// \param Loc The insert and source location description.
/// \param X The target pointer to be atomically read
@@ -3140,8 +3139,8 @@ class OpenMPIRBuilder {
///
/// \return Insertion point after generated atomic Write IR.
InsertPointTy createAtomicWrite(const LocationDescription &Loc,
- AtomicOpValue &X, Value *Expr,
- AtomicOrdering AO);
+ InsertPointTy AllocaIP, AtomicOpValue &X,
+ Value *Expr, AtomicOrdering AO);
/// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
/// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
diff --git a/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h b/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
index 7c541a68859bc0..f9590868792981 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
@@ -1,129 +1,162 @@
-//===- BuildBuiltins.h - Utility builder for builtins ---------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements some functions for lowering compiler builtins.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_UTILS_BUILDBUILTINS_H
-#define LLVM_TRANSFORMS_UTILS_BUILDBUILTINS_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/Support/Alignment.h"
-#include "llvm/Support/AtomicOrdering.h"
-#include <cstdint>
-#include <variant>
-
-namespace llvm {
-class Value;
-class TargetLibraryInfo;
-class DataLayout;
-class IRBuilderBase;
-class Type;
-class TargetLowering;
-
-namespace SyncScope {
-typedef uint8_t ID;
-}
-
-/// Emit a call to the __atomic_compare_exchange builtin. This may either be
-/// lowered to the cmpxchg LLVM instruction, or to one of the following libcall
-/// functions: __atomic_compare_exchange_1, __atomic_compare_exchange_2,
-/// __atomic_compare_exchange_4, __atomic_compare_exchange_8,
-/// __atomic_compare_exchange_16, __atomic_compare_exchange.
-///
-/// Also see:
-/// https://llvm.org/docs/Atomics.html
-/// https://llvm.org/docs/LangRef.html#cmpxchg-instruction
-/// https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
-/// https://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary#GCC_intrinsics
-///
-/// @param Ptr The memory location accessed atomically.
-/// @Param ExpectedPtr Pointer to the data expected at /p Ptr. The exchange will
-/// only happen if the value at \p Ptr is equal to this. Data
-/// at \p ExpectedPtr may or may not be be overwritten, so do
-/// not use after this call.
-/// @Param DesiredPtr Pointer to the data that the data at /p Ptr is replaced
-/// with.
-/// @param IsWeak If true, the exchange may not happen even if the data at
-/// \p Ptr equals to \p ExpectedPtr.
-/// @param IsVolatile Whether to mark the access as volatile.
-/// @param SuccessMemorder If the exchange succeeds, memory is affected
-/// according to the memory model.
-/// @param FailureMemorder If the exchange fails, memory is affected according
-/// to the memory model. It is considered an atomic "read"
-/// for the purpose of identifying release sequences. Must
-/// not be release, acquire-release, and at most as strong as
-/// \p SuccessMemorder.
-/// @param Scope (optional) The synchronization scope (domain of threads
-/// where this access has to be atomic, e.g. CUDA
-/// warp/block/grid-level atomics) of this access. Defaults
-/// to system scope.
-/// @param DataTy (optional) Type of the value to be accessed. cmpxchg
-/// supports integer and pointers only. If any other type or
-/// omitted, type-prunes to an integer the holds at least \p
-/// DataSize bytes.
-/// @param PrevPtr (optional) The value that /p Ptr had before the exchange
-/// is stored here.
-/// @param DataSize Number of bytes to be exchanged.
-/// @param AvailableSize The total size that can be used for the atomic
-/// operation. It may include trailing padding in addition to
-/// the data type's size to allow the use power-of-two
-/// instructions/calls.
-/// @param Align (optional) Known alignment of /p Ptr. If omitted,
-/// alignment is inferred from /p Ptr itself and falls back
-/// to no alignment.
-/// @param Builder User to emit instructions.
-/// @param DL The target's data layout.
-/// @param TLI The target's libcall library availability.
-/// @param TL (optional) Used to determine which instructions the
-/// target support. If omitted, assumes all accesses up to a
-/// size of 16 bytes are supported.
-/// @param SyncScopes Available scopes for the target. Only needed if /p Scope
-/// is not a constant.
-/// @param FallbackScope Fallback scope if /p Scope is not an available scope.
-/// @param AllowInstruction Whether a 'cmpxchg' can be emitted. False is used by
-/// AtomicExpandPass that replaces cmpxchg instructions not
-/// supported by the target.
-/// @param AllowSwitch If one of IsWeak,SuccessMemorder,FailureMemorder,Scope is
-/// not a constant, allow emitting a switch for each possible
-/// value since cmpxchg only allows constant arguments for
-/// these.
-/// @param AllowSizedLibcall Allow emitting calls to __atomic_compare_exchange_n
-/// libcall functions.
-///
-/// @return A boolean value that indicates whether the exchange has happened
-/// (true) or not (false).
-Value *emitAtomicCompareExchangeBuiltin(
- Value *Ptr, Value *ExpectedPtr, Value *DesiredPtr,
- std::variant<Value *, bool> IsWeak, bool IsVolatile,
- std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder,
- std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> FailureMemorder,
- std::variant<Value *, SyncScope::ID, StringRef> Scope, Value *PrevPtr,
- Type *DataTy, std::optional<uint64_t> DataSize,
- std::optional<uint64_t> AvailableSize, MaybeAlign Align,
- IRBuilderBase &Builder, const DataLayout &DL, const TargetLibraryInfo *TLI,
- const TargetLowering *TL,
- ArrayRef<std::pair<uint32_t, StringRef>> SyncScopes,
- StringRef FallbackScope, bool AllowInstruction = true,
- bool AllowSwitch = true, bool AllowSizedLibcall = true);
-
-Value *emitAtomicCompareExchangeBuiltin(
- Value *Ptr, Value *ExpectedPtr, Value *DesiredPtr,
- std::variant<Value *, bool> Weak, bool IsVolatile,
- std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder,
- std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> FailureMemorder,
- Value *PrevPtr, Type *DataTy, std::optional<uint64_t> DataSize,
- std::optional<uint64_t> AvailableSize, MaybeAlign Align,
- IRBuilderBase &Builder, const DataLayout &DL, const TargetLibraryInfo *TLI,
- const TargetLowering *TL, bool AllowInstruction = true,
- bool AllowSwitch = true, bool AllowSizedLibcall = true);
-
-} // namespace llvm
-
-#endif /* LLVM_TRANSFORMS_UTILS_BUILDBUILTINS_H */
+//===- BuildBuiltins.h - Utility builder for builtins ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some functions for lowering compiler builtins.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_BUILDBUILTINS_H
+#define LLVM_TRANSFORMS_UTILS_BUILDBUILTINS_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Alignment.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include <cstdint>
+#include <variant>
+
+namespace llvm {
+class Value;
+class TargetLibraryInfo;
+class DataLayout;
+class IRBuilderBase;
+class Type;
+class TargetLowering;
+
+namespace SyncScope {
+typedef uint8_t ID;
+}
+
+void emitAtomicLoadBuiltin(
+ Value *Ptr, Value *RetPtr,
+ // std::variant<Value *, bool> IsWeak,
+ bool IsVolatile,
+ std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> Memorder,
+ std::variant<Value *, SyncScope::ID, StringRef> Scope, Type *DataTy,
+ std::optional<uint64_t> DataSize, std::optional<uint64_t> AvailableSize,
+ MaybeAlign Align, IRBuilderBase &Builder, const DataLayout &DL,
+ const TargetLibraryInfo *TLI, const TargetLowering *TL,
+ ArrayRef<std::pair<uint32_t, StringRef>> SyncScopes,
+ StringRef FallbackScope, llvm::Twine Name = Twine(),
+ bool AllowInstruction = true, bool AllowSwitch = true,
+ bool AllowSizedLibcall = true, bool AllowLibcall = true);
+
+void emitAtomicStoreBuiltin(
+ Value *Ptr, Value *ValPtr,
+ // std::variant<Value *, bool> IsWeak,
+ bool IsVolatile,
+ std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> Memorder,
+ std::variant<Value *, SyncScope::ID, StringRef> Scope, Type *DataTy,
+ std::optional<uint64_t> DataSize, std::optional<uint64_t> AvailableSize,
+ MaybeAlign Align, IRBuilderBase &Builder, const DataLayout &DL,
+ const TargetLibraryInfo *TLI, const TargetLowering *TL,
+ ArrayRef<std::pair<uint32_t, StringRef>> SyncScopes,
+ StringRef FallbackScope, llvm::Twine Name = Twine(),
+ bool AllowInstruction = true, bool AllowSwitch = true,
+ bool AllowSizedLibcall = true, bool AllowLibcall = true);
+
+/// Emit a call to the __atomic_compare_exchange builtin. This may either be
+/// lowered to the cmpxchg LLVM instruction, or to one of the following libcall
+/// functions: __atomic_compare_exchange_1, __atomic_compare_exchange_2,
+/// __atomic_compare_exchange_4, __atomic_compare_exchange_8,
+/// __atomic_compare_exchange_16, __atomic_compare_exchange.
+///
+/// Also see:
+/// https://llvm.org/docs/Atomics.html
+/// https://llvm.org/docs/LangRef.html#cmpxchg-instruction
+/// https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
+/// https://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary#GCC_intrinsics
+///
+/// @param Ptr The memory location accessed atomically.
+/// @Param ExpectedPtr Pointer to the data expected at /p Ptr. The exchange will
+/// only happen if the value at \p Ptr is equal to this. Data
+/// at \p ExpectedPtr may or may not be be overwritten, so do
+/// not use after this call.
+/// @Param DesiredPtr Pointer to the data that the data at /p Ptr is replaced
+/// with.
+/// @param IsWeak If true, the exchange may not happen even if the data at
+/// \p Ptr equals to \p ExpectedPtr.
+/// @param IsVolatile Whether to mark the access as volatile.
+/// @param SuccessMemorder If the exchange succeeds, memory is affected
+/// according to the memory model.
+/// @param FailureMemorder If the exchange fails, memory is affected according
+/// to the memory model. It is considered an atomic "read"
+/// for the purpose of identifying release sequences. Must
+/// not be release, acquire-release, and at most as strong as
+/// \p SuccessMemorder.
+/// @param Scope (optional) The synchronization scope (domain of threads
+/// where this access has to be atomic, e.g. CUDA
+/// warp/block/grid-level atomics) of this access. Defaults
+/// to system scope.
+/// @param DataTy (optional) Type of the value to be accessed. cmpxchg
+/// supports integer and pointers only. If any other type or
+/// omitted, type-prunes to an integer the holds at least \p
+/// DataSize bytes.
+/// @param PrevPtr (optional) The value that /p Ptr had before the exchange
+/// is stored here.
+/// @param DataSize Number of bytes to be exchanged.
+/// @param AvailableSize The total size that can be used for the atomic
+/// operation. It may include trailing padding in addition to
+/// the data type's size to allow the use power-of-two
+/// instructions/calls.
+/// @param Align (optional) Known alignment of /p Ptr. If omitted,
+/// alignment is inferred from /p Ptr itself and falls back
+/// to no alignment.
+/// @param Builder User to emit instructions.
+/// @param DL The target's data layout.
+/// @param TLI The target's libcall library availability.
+/// @param TL (optional) Used to determine which instructions the
+/// target support. If omitted, assumes all accesses up to a
+/// size of 16 bytes are supported.
+/// @param SyncScopes Available scopes for the target. Only needed if /p Scope
+/// is not a constant.
+/// @param FallbackScope Fallback scope if /p Scope is not an available scope.
+/// @param AllowInstruction Whether a 'cmpxchg' can be emitted. False is used by
+/// AtomicExpandPass that replaces cmpxchg instructions not
+/// supported by the target.
+/// @param AllowSwitch If one of IsWeak,SuccessMemorder,FailureMemorder,Scope is
+/// not a constant, allow emitting a switch for each possible
+/// value since cmpxchg only allows constant arguments for
+/// these.
+/// @param AllowSizedLibcall Allow emitting calls to __atomic_compare_exchange_n
+/// libcall functions.
+///
+/// @return A boolean value that indicates whether the exchange has happened
+/// (true) or not (false).
+Value *emitAtomicCompareExchangeBuiltin(
+ Value *Ptr, Value *ExpectedPtr, Value *DesiredPtr,
+ std::variant<Value *, bool> IsWeak, bool IsVolatile,
+ std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder,
+ std::variant<std::monostate, Value *, AtomicOrdering, AtomicOrderingCABI>
+ FailureMemorder,
+ std::variant<Value *, SyncScope::ID, StringRef> Scope, Value *PrevPtr,
+ Type *DataTy, std::optional<uint64_t> DataSize,
+ std::optional<uint64_t> AvailableSize, MaybeAlign Align,
+ IRBuilderBase &Builder, const DataLayout &DL, const TargetLibraryInfo *TLI,
+ const TargetLowering *TL,
+ ArrayRef<std::pair<uint32_t, StringRef>> SyncScopes,
+ StringRef FallbackScope, llvm::Twine Name = Twine(),
+ bool AllowInstruction = true, bool AllowSwitch = true,
+ bool AllowSizedLibcall = true, bool AllowLibcall = true);
+
+Value *emitAtomicCompareExchangeBuiltin(
+ Value *Ptr, Value *ExpectedPtr, Value *DesiredPtr,
+ std::variant<Value *, bool> IsWeak, bool IsVolatile,
+ std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder,
+ std::variant<std::monostate, Value *, AtomicOrdering, AtomicOrderingCABI>
+ FailureMemorder,
+ Value *PrevPtr, Type *DataTy, std::optional<uint64_t> DataSize,
+ std::optional<uint64_t> AvailableSize, MaybeAlign Align,
+ IRBuilderBase &Builder, const DataLayout &DL, const TargetLibraryInfo *TLI,
+ const TargetLowering *TL, llvm::Twine Name = Twine(),
+ bool AllowInstruction = true, bool AllowSwitch = true,
+ bool AllowSizedLibcall = true, bool AllowLibcall = true);
+
+} // namespace llvm
+
+#endif /* LLVM_TRANSFORMS_UTILS_BUILDBUILTINS_H */
diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
index 85b50eed5e416b..d419e6c775d9f0 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -192,10 +192,37 @@ namespace llvm {
Value *emitVSPrintf(Value *Dest, Value *Fmt, Value *VAList, IRBuilderBase &B,
const TargetLibraryInfo *TLI);
+ /// Emit a call to the __atomic_load function.
+ /// Defined here:
+ /// https://llvm.org/docs/Atomics.html#libcalls-atomic
+ /// https://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary#list_of_library_routines
+ Value *emitAtomicLoad(Value *Size, Value *Ptr, Value *Ret, Value *Memorder,
+ IRBuilderBase &B, const DataLayout &DL,
+ const TargetLibraryInfo *TLI);
+
+ /// Variant of __atomic_load where \p Size is either 1, 2, 4, 8, or 16.
+ Value *emitAtomicLoadN(size_t Size, Value *Ptr, Value *Memorder,
+ IRBuilderBase &B, const DataLayout &DL,
+ const TargetLibraryInfo *TLI);
+
+ /// Emit a call to the __atomic_store function.
+ /// Defined here:
+ /// https://llvm.org/docs/Atomics.html#libcalls-atomic
+ /// https://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary#list_of_library_routines
+ Value *emitAtomicStore(Value *Size, Value *Ptr, Value *ValPtr,
+ Value *Memorder, IRBuilderBase &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI);
+
+ /// Variant of __atomic_store where \p Size is either 1, 2, 4, 8, or 16.
+ Value *emitAtomicStoreN(size_t Size, Value *Ptr, Value *Val, Value *Memorder,
+ IRBuilderBase &B, const DataLayout &DL,
+ const TargetLibraryInfo *TLI);
+
/// Emit a call to the __atomic_compare_exchange function.
- /// Defined here: https://llvm.org/docs/Atomics.html#libcalls-atomic,
+ /// Defined here:
+ /// https://llvm.org/docs/Atomics.html#libcalls-atomic
/// https://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary#list_of_library_routines
-///
+ ///
/// NOTE: Signature is different to the builtins defined here:
/// https://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary#GCC_intrinsics
Value *emitAtomicCompareExchange(Value *Size, Value *Ptr, Value *Expected,
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index cf1a8477685608..27a7ff9ef55352 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -68,9 +68,9 @@ enum FuncArgTypeID : char {
Long, // Either 32 or 64 bits.
IntX, // Any integer type.
Int64,
- LLong, // 64 bits on all targets.
- SizeT, // size_t.
- SSizeT, // POSIX ssize_t.
+ LLong, // 64 bits on all targets.
+ SizeT, // size_t.
+ SSizeT, // POSIX ssize_t.
Int128,
Flt, // IEEE float.
Dbl, // IEEE double.
@@ -830,7 +830,23 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T,
// Miscellaneous other functions not provided.
TLI.setUnavailable(LibFunc_atomic_load);
+ TLI.setUnavailable(LibFunc_atomic_load_1);
+ TLI.setUnavailable(LibFunc_atomic_load_2);
+ TLI.setUnavailable(LibFunc_atomic_load_4);
+ TLI.setUnavailable(LibFunc_atomic_load_8);
+ TLI.setUnavailable(LibFunc_atomic_load_16);
TLI.setUnavailable(LibFunc_atomic_store);
+ TLI.setUnavailable(LibFunc_atomic_store_1);
+ TLI.setUnavailable(LibFunc_atomic_store_2);
+ TLI.setUnavailable(LibFunc_atomic_store_4);
+ TLI.setUnavailable(LibFunc_atomic_store_8);
+ TLI.setUnavailable(LibFunc_atomic_store_16);
+ TLI.setUnavailable(LibFunc_atomic_compare_exchange);
+ TLI.setUnavailable(LibFunc_atomic_compare_exchange_1);
+ TLI.setUnavailable(LibFunc_atomic_compare_exchange_2);
+ TLI.setUnavailable(LibFunc_atomic_compare_exchange_4);
+ TLI.setUnavailable(LibFunc_atomic_compare_exchange_8);
+ TLI.setUnavailable(LibFunc_atomic_compare_exchange_16);
TLI.setUnavailable(LibFunc___kmpc_alloc_shared);
TLI.setUnavailable(LibFunc___kmpc_free_shared);
TLI.setUnavailable(LibFunc_dunder_strndup);
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index d2e4dc1c85dfd2..17cbed65c04c3f 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -52,6 +52,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/BuildBuiltins.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
#include "llvm/Transforms/Utils/LoopPeel.h"
@@ -8036,54 +8037,42 @@ OpenMPIRBuilder::createAtomicRead(const LocationDescription &Loc,
assert(X.Var->getType()->isPointerTy() &&
"OMP Atomic expects a pointer to target memory");
+ assert(V.Var->getType()->isPointerTy() &&
+ "OMP Atomic expects a pointer for atomic load result");
Type *XElemTy = X.ElemTy;
- assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
- XElemTy->isPointerTy() || XElemTy->isStructTy()) &&
- "OMP atomic read expected a scalar type");
-
- Value *XRead = nullptr;
-
- if (XElemTy->isIntegerTy()) {
- LoadInst *XLD =
- Builder.CreateLoad(XElemTy, X.Var, X.IsVolatile, "omp.atomic.read");
- XLD->setAtomic(AO);
- XRead = cast<Value>(XLD);
- } else if (XElemTy->isStructTy()) {
- // FIXME: Add checks to ensure __atomic_load is emitted iff the
- // target does not support `atomicrmw` of the size of the struct
- LoadInst *OldVal = Builder.CreateLoad(XElemTy, X.Var, "omp.atomic.read");
- OldVal->setAtomic(AO);
- const DataLayout &LoadDL = OldVal->getModule()->getDataLayout();
- unsigned LoadSize =
- LoadDL.getTypeStoreSize(OldVal->getPointerOperand()->getType());
- OpenMPIRBuilder::AtomicInfo atomicInfo(
- &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->getAlign(),
- OldVal->getAlign(), true /* UseLibcall */, X.Var);
- auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
- XRead = AtomicLoadRes.first;
- OldVal->eraseFromParent();
- } else {
- // We need to perform atomic op as integer
- IntegerType *IntCastTy =
- IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
- LoadInst *XLoad =
- Builder.CreateLoad(IntCastTy, X.Var, X.IsVolatile, "omp.atomic.load");
- XLoad->setAtomic(AO);
- if (XElemTy->isFloatingPointTy()) {
- XRead = Builder.CreateBitCast(XLoad, XElemTy, "atomic.flt.cast");
- } else {
- XRead = Builder.CreateIntToPtr(XLoad, XElemTy, "atomic.ptr.cast");
- }
- }
+
+ Triple T(Builder.GetInsertBlock()->getModule()->getTargetTriple());
+ TargetLibraryInfoImpl TLII(T);
+ TargetLibraryInfo TLI(TLII);
+ const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
+ Twine Name(X.Var->getName());
+
+ emitAtomicLoadBuiltin(X.Var,
+ /*RetPtr=*/V.Var,
+ /*IsVolatile=*/X.IsVolatile || V.IsVolatile,
+ /*Memorder=*/AO,
+ /*SyncScope=*/SyncScope::System,
+ /*DataTy=*/XElemTy,
+ /*DataSize=*/{},
+ /*AvailableSize=*/{},
+ /*Align=*/{},
+ /*Builder=*/Builder,
+ /*DL=*/DL,
+ /*TLI=*/&TLI,
+ /*TL=*/nullptr,
+ /*SyncScopes=*/{},
+ /*FallbackScope=*/StringRef(),
+ /*Name=*/Name + ".atomic.read");
checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
- Builder.CreateStore(XRead, V.Var, V.IsVolatile);
+
+ // LoadInst *LoadedVal= Builder.CreateLoad(XElemTy, X.Var, Name );
return Builder.saveIP();
}
OpenMPIRBuilder::InsertPointTy
OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
- AtomicOpValue &X, Value *Expr,
- AtomicOrdering AO) {
+ InsertPointTy AllocaIP, AtomicOpValue &X,
+ Value *Expr, AtomicOrdering AO) {
if (!updateToLocation(Loc))
return Loc.IP;
@@ -8094,18 +8083,35 @@ OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
XElemTy->isPointerTy()) &&
"OMP atomic write expected a scalar type");
- if (XElemTy->isIntegerTy()) {
- StoreInst *XSt = Builder.CreateStore(Expr, X.Var, X.IsVolatile);
- XSt->setAtomic(AO);
- } else {
- // We need to bitcast and perform atomic op as integers
- IntegerType *IntCastTy =
- IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
- Value *ExprCast =
- Builder.CreateBitCast(Expr, IntCastTy, "atomic.src.int.cast");
- StoreInst *XSt = Builder.CreateStore(ExprCast, X.Var, X.IsVolatile);
- XSt->setAtomic(AO);
- }
+ Triple T(Builder.GetInsertBlock()->getModule()->getTargetTriple());
+ TargetLibraryInfoImpl TLII(T);
+ TargetLibraryInfo TLI(TLII);
+ const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
+ Twine Name(X.Var->getName());
+
+ // Reserve some stack space.
+ auto ContIP = Builder.saveIP();
+ Builder.restoreIP(AllocaIP);
+ auto ValPtr = Builder.CreateAlloca(XElemTy, nullptr, Name + ".atomic.val");
+ Builder.restoreIP(ContIP);
+
+ Builder.CreateStore(Expr, ValPtr);
+ emitAtomicStoreBuiltin(X.Var,
+ /*ValPtr=*/ValPtr,
+ /*IsVolatile=*/X.IsVolatile,
+ /*Memorder=*/AO,
+ /*SyncScope=*/SyncScope::System,
+ /*DataTy=*/XElemTy,
+ /*DataSize=*/{},
+ /*AvailableSize=*/{},
+ /*Align=*/{},
+ /*Builder=*/Builder,
+ /*DL=*/DL,
+ /*TLI=*/&TLI,
+ /*TL=*/nullptr,
+ /*SyncScopes=*/{},
+ /*FallbackScope=*/StringRef(),
+ /*Name=*/Name + ".atomic.write");
checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write);
return Builder.saveIP();
@@ -8180,8 +8186,8 @@ Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr) {
- // TODO: handle the case where XElemTy is not byte-sized or not a power of 2
- // or a complex datatype.
+ assert(XElemTy);
+
bool emitRMWOp = false;
switch (RMWOp) {
case AtomicRMWInst::Add:
@@ -8193,7 +8199,7 @@ Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
emitRMWOp = XElemTy;
break;
case AtomicRMWInst::Sub:
- emitRMWOp = (IsXBinopExpr && XElemTy);
+ emitRMWOp = IsXBinopExpr;
break;
default:
emitRMWOp = false;
@@ -8210,124 +8216,88 @@ Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
Res.second = Res.first;
else
Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
- } else if (RMWOp == llvm::AtomicRMWInst::BinOp::BAD_BINOP &&
- XElemTy->isStructTy()) {
- LoadInst *OldVal =
- Builder.CreateLoad(XElemTy, X, X->getName() + ".atomic.load");
- OldVal->setAtomic(AO);
- const DataLayout &LoadDL = OldVal->getModule()->getDataLayout();
- unsigned LoadSize =
- LoadDL.getTypeStoreSize(OldVal->getPointerOperand()->getType());
-
- OpenMPIRBuilder::AtomicInfo atomicInfo(
- &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->getAlign(),
- OldVal->getAlign(), true /* UseLibcall */, X);
- auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
- BasicBlock *CurBB = Builder.GetInsertBlock();
- Instruction *CurBBTI = CurBB->getTerminator();
- CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
- BasicBlock *ExitBB =
- CurBB->splitBasicBlock(CurBBTI, X->getName() + ".atomic.exit");
- BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(),
- X->getName() + ".atomic.cont");
- ContBB->getTerminator()->eraseFromParent();
- Builder.restoreIP(AllocaIP);
- AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
- NewAtomicAddr->setName(X->getName() + "x.new.val");
- Builder.SetInsertPoint(ContBB);
- llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2);
- PHI->addIncoming(AtomicLoadRes.first, CurBB);
- Value *OldExprVal = PHI;
- Expected<Value *> CBResult = UpdateOp(OldExprVal, Builder);
- if (!CBResult)
- return CBResult.takeError();
- Value *Upd = *CBResult;
- Builder.CreateStore(Upd, NewAtomicAddr);
- AtomicOrdering Failure =
- llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
- auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
- AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
- LoadInst *PHILoad = Builder.CreateLoad(XElemTy, Result.first);
- PHI->addIncoming(PHILoad, Builder.GetInsertBlock());
- Builder.CreateCondBr(Result.second, ExitBB, ContBB);
- OldVal->eraseFromParent();
- Res.first = OldExprVal;
- Res.second = Upd;
-
- if (UnreachableInst *ExitTI =
- dyn_cast<UnreachableInst>(ExitBB->getTerminator())) {
- CurBBTI->eraseFromParent();
- Builder.SetInsertPoint(ExitBB);
- } else {
- Builder.SetInsertPoint(ExitTI);
- }
- } else {
- IntegerType *IntCastTy =
- IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
- LoadInst *OldVal =
- Builder.CreateLoad(IntCastTy, X, X->getName() + ".atomic.load");
- OldVal->setAtomic(AO);
- // CurBB
- // | /---\
- // ContBB |
- // | \---/
- // ExitBB
- BasicBlock *CurBB = Builder.GetInsertBlock();
- Instruction *CurBBTI = CurBB->getTerminator();
- CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
- BasicBlock *ExitBB =
- CurBB->splitBasicBlock(CurBBTI, X->getName() + ".atomic.exit");
- BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(),
- X->getName() + ".atomic.cont");
- ContBB->getTerminator()->eraseFromParent();
- Builder.restoreIP(AllocaIP);
- AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
- NewAtomicAddr->setName(X->getName() + "x.new.val");
- Builder.SetInsertPoint(ContBB);
- llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2);
- PHI->addIncoming(OldVal, CurBB);
- bool IsIntTy = XElemTy->isIntegerTy();
- Value *OldExprVal = PHI;
- if (!IsIntTy) {
- if (XElemTy->isFloatingPointTy()) {
- OldExprVal = Builder.CreateBitCast(PHI, XElemTy,
- X->getName() + ".atomic.fltCast");
- } else {
- OldExprVal = Builder.CreateIntToPtr(PHI, XElemTy,
- X->getName() + ".atomic.ptrCast");
- }
- }
-
- Expected<Value *> CBResult = UpdateOp(OldExprVal, Builder);
- if (!CBResult)
- return CBResult.takeError();
- Value *Upd = *CBResult;
- Builder.CreateStore(Upd, NewAtomicAddr);
- LoadInst *DesiredVal = Builder.CreateLoad(IntCastTy, NewAtomicAddr);
- AtomicOrdering Failure =
- llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
- AtomicCmpXchgInst *Result = Builder.CreateAtomicCmpXchg(
- X, PHI, DesiredVal, llvm::MaybeAlign(), AO, Failure);
- Result->setVolatile(VolatileX);
- Value *PreviousVal = Builder.CreateExtractValue(Result, /*Idxs=*/0);
- Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1);
- PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
- Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
-
- Res.first = OldExprVal;
- Res.second = Upd;
-
- // set Insertion point in exit block
- if (UnreachableInst *ExitTI =
- dyn_cast<UnreachableInst>(ExitBB->getTerminator())) {
- CurBBTI->eraseFromParent();
- Builder.SetInsertPoint(ExitBB);
- } else {
- Builder.SetInsertPoint(ExitTI);
- }
+ return Res;
}
- return Res;
+ Triple T(Builder.GetInsertBlock()->getModule()->getTargetTriple());
+ TargetLibraryInfoImpl TLII(T);
+ TargetLibraryInfo TLI(TLII);
+ const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
+ Twine Name(X->getName());
+
+ // Create new CFG.
+ BasicBlock *ContBB = splitBB(Builder, true, X->getName() + ".atomic.cont");
+ BasicBlock *ExitBB = splitBB(Builder, false, X->getName() + ".atomic.exit");
+ auto ContIP = Builder.saveIP();
+
+ // Reserve some stack space
+ Builder.restoreIP(AllocaIP);
+ AllocaInst *OrigPtr =
+ Builder.CreateAlloca(XElemTy, nullptr, Name + ".atomic.orig.ptr");
+ AllocaInst *UpdPtr =
+ Builder.CreateAlloca(XElemTy, nullptr, Name + ".atomic.upd.ptr");
+ AllocaInst *PrevPtr =
+ Builder.CreateAlloca(XElemTy, nullptr, Name + ".atomic.upd.prev");
+
+ // Emit the update transaction.
+ Builder.SetInsertPoint(ContBB);
+
+ // 1. Get original value.
+ emitAtomicLoadBuiltin(X,
+ /*RetPtr=*/OrigPtr,
+ /*IsVolatile=*/false,
+ /*Memorder=*/AO,
+ /*SyncScope=*/SyncScope::System,
+ /*DataTy=*/XElemTy,
+ /*DataSize=*/{},
+ /*AvailableSize=*/{},
+ /*Align=*/{},
+ /*Builder=*/Builder,
+ /*DL=*/DL,
+ /*TLI=*/&TLI,
+ /*TL=*/nullptr,
+ /*SyncScopes=*/{},
+ /*FallbackScope=*/StringRef(),
+ /*Name=*/Name);
+
+ // 2. Let the user code compute the new value.
+ // FIXME: This should not be done by-value, as the type might be unreasonable
+ // large (e.g. i4096) and LLVM does not scale will with such large types.
+ Value *OrigVal = Builder.CreateLoad(XElemTy, OrigPtr, Name + ".atomic.orig");
+ Expected<Value *> CBResult = UpdateOp(OrigVal, Builder);
+ if (!CBResult)
+ return CBResult.takeError();
+ Value *UpdVal = *CBResult;
+ Builder.CreateStore(UpdVal, UpdPtr);
+
+ // 3. AtomicCompareExchange to replace OrigVal with UpdVal.
+ Value *Success = emitAtomicCompareExchangeBuiltin(
+ /*Ptr=*/X,
+ /*ExpectedPtr=*/OrigPtr,
+ /*DesiredPtr=*/UpdPtr,
+ /*IsWeak=*/true,
+ /*IsVolatile=*/false,
+ /*SuccessMemorder=*/AO,
+ /*FailureMemorder=*/{},
+ /*PrevPtr=*/PrevPtr,
+ /*DataTy=*/XElemTy,
+ /*DataSize=*/{},
+ /*AvailableSize=*/{},
+ /*Align=*/{},
+ /*Builder=*/Builder,
+ /*DL=*/DL,
+ /*TLI=*/&TLI,
+ /*TL=*/nullptr,
+ /*Name=*/Name);
+
+ // 4. Repeat transaction until successful.
+ Builder.CreateCondBr(Success, ExitBB, ContBB);
+
+ // Continue when the update transaction was successful.
+ Builder.restoreIP(ContIP);
+ Value *PrevVal = Builder.CreateLoad(XElemTy, PrevPtr, Name + ".atomic.prev");
+
+ return std::make_pair(OrigVal, PrevVal);
}
OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
diff --git a/llvm/lib/Transforms/Utils/BuildBuiltins.cpp b/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
index 245fc398186e3c..43f21bdec38ff7 100644
--- a/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
+++ b/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
@@ -19,6 +19,7 @@
using namespace llvm;
+namespace {
static IntegerType *getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI) {
return B.getIntNTy(TLI->getIntSize());
}
@@ -48,19 +49,674 @@ static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
Size <= LargestSize;
}
+// Helper to check if a type is in a variant
+template <typename T, typename Variant> struct is_in_variant;
+
+template <typename T, typename... Types>
+struct is_in_variant<T, std::variant<Types...>>
+ : std::disjunction<std::is_same<T, Types>...> {};
+
+/// Alternative to std::holds_alternative that works even if the std::variant
+/// cannot hold T.
+template <typename T, typename Variant>
+constexpr bool holds_alternative_if_exists(const Variant &v) {
+ if constexpr (is_in_variant<T, Variant>::value) {
+ return std::holds_alternative<T>(v);
+ } else {
+ // Type T is not in the variant, return false or handle accordingly
+ return false;
+ }
+}
+
+} // namespace
+
+void llvm::emitAtomicLoadBuiltin(
+ Value *Ptr, Value *RetPtr,
+ // std::variant<Value *, bool> IsWeak,
+ bool IsVolatile,
+ std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> Memorder,
+ std::variant<Value *, SyncScope::ID, StringRef> Scope, Type *DataTy,
+ std::optional<uint64_t> DataSize, std::optional<uint64_t> AvailableSize,
+ MaybeAlign Align, IRBuilderBase &Builder, const DataLayout &DL,
+ const TargetLibraryInfo *TLI, const TargetLowering *TL,
+ ArrayRef<std::pair<uint32_t, StringRef>> SyncScopes,
+ StringRef FallbackScope, llvm::Twine Name, bool AllowInstruction,
+ bool AllowSwitch, bool AllowSizedLibcall, bool AllowLibcall) {
+ assert(Ptr->getType()->isPointerTy());
+ assert(RetPtr->getType()->isPointerTy());
+ assert(TLI);
+
+ LLVMContext &Ctx = Builder.getContext();
+ Function *CurFn = Builder.GetInsertBlock()->getParent();
+
+ unsigned MaxAtomicSizeSupported = 16;
+ if (TL)
+ MaxAtomicSizeSupported = TL->getMaxAtomicSizeInBitsSupported() / 8;
+
+ uint64_t DataSizeConst;
+ if (DataSize) {
+ DataSizeConst = *DataSize;
+ } else {
+ TypeSize DS = DL.getTypeStoreSize(DataTy);
+ DataSizeConst = DS.getFixedValue();
+ }
+ uint64_t AvailableSizeConst = AvailableSize.value_or(DataSizeConst);
+ assert(DataSizeConst <= AvailableSizeConst);
+
+#ifndef NDEBUG
+ if (DataTy) {
+ // 'long double' (80-bit extended precision) behaves strange here.
+ // DL.getTypeStoreSize says it is 10 bytes
+ // Clang says it is 12 bytes
+ // AtomicExpandPass would disagree with CGAtomic (not for cmpxchg that does
+ // not support floats, so AtomicExpandPass doesn't even know it originally
+ // was an FP80)
+ TypeSize DS = DL.getTypeStoreSize(DataTy);
+ assert(DS.getKnownMinValue() <= DataSizeConst &&
+ "Must access at least all the relevant bits of the data, possibly "
+ "some more for padding");
+ }
+#endif
+
+ Type *BoolTy = Builder.getInt1Ty();
+ Type *IntTy = getIntTy(Builder, TLI);
+
+ uint64_t PreferredSize = PowerOf2Ceil(DataSizeConst);
+ if (!PreferredSize || PreferredSize > MaxAtomicSizeSupported)
+ PreferredSize = DataSizeConst;
+
+ llvm::Align EffectiveAlign;
+ if (Align) {
+ EffectiveAlign = *Align;
+ } else {
+ // https://llvm.org/docs/LangRef.html#cmpxchg-instruction
+ //
+ // The alignment is only optional when parsing textual IR; for in-memory
+ // IR, it is always present. If unspecified, the alignment is assumed to
+ // be equal to the size of the ‘<value>’ type.
+ //
+ // We prefer safety here and assume no alignment, unless
+ // getPointerAlignment() can determine the actual alignment.
+ EffectiveAlign = Ptr->getPointerAlignment(DL);
+ }
+
+ // Only use the original data type if it is compatible with cmpxchg (and sized
+ // libcall function) and matches the preferred size. No type punning needed
+ // for __atomic_compare_exchange which only takes pointers.
+ Type *CoercedTy = nullptr;
+ if (DataTy && DataSizeConst == PreferredSize &&
+ (DataTy->isIntegerTy() || DataTy->isPointerTy()))
+ CoercedTy = DataTy;
+ else if (PreferredSize <= 16)
+ CoercedTy = IntegerType::get(Ctx, PreferredSize * 8);
+
+ // For resolving the SuccessMemorder/FailureMemorder arguments. If it is
+ // constant, determine the AtomicOrdering for use with the cmpxchg
+ // instruction. Also determines the llvm::Value to be passed to
+ // __atomic_compare_exchange in case cmpxchg is not legal.
+ auto processMemorder = [&](auto MemorderVariant)
+ -> std::pair<std::optional<AtomicOrdering>, Value *> {
+ if (std::holds_alternative<AtomicOrdering>(MemorderVariant)) {
+ auto Memorder = std::get<AtomicOrdering>(MemorderVariant);
+ return std::make_pair(
+ Memorder,
+ ConstantInt::get(IntTy, static_cast<uint64_t>(toCABI(Memorder))));
+ }
+
+ if (std::holds_alternative<AtomicOrderingCABI>(MemorderVariant)) {
+ auto MemorderCABI = std::get<AtomicOrderingCABI>(MemorderVariant);
+ return std::make_pair(
+ fromCABI(MemorderCABI),
+ ConstantInt::get(IntTy, static_cast<uint64_t>(MemorderCABI)));
+ }
+
+ auto *MemorderCABI = std::get<Value *>(MemorderVariant);
+ if (auto *MO = dyn_cast<ConstantInt>(MemorderCABI)) {
+ uint64_t MOInt = MO->getZExtValue();
+ return std::make_pair(fromCABI(MOInt), MO);
+ }
+
+ return std::make_pair(std::nullopt, MemorderCABI);
+ };
+
+ auto processScope = [&](auto ScopeVariant)
+ -> std::pair<std::optional<SyncScope::ID>, Value *> {
+ if (std::holds_alternative<SyncScope::ID>(ScopeVariant)) {
+ auto ScopeID = std::get<SyncScope::ID>(ScopeVariant);
+ return std::make_pair(ScopeID, nullptr);
+ }
+
+ if (std::holds_alternative<StringRef>(ScopeVariant)) {
+ auto ScopeName = std::get<StringRef>(ScopeVariant);
+ SyncScope::ID ScopeID = Ctx.getOrInsertSyncScopeID(ScopeName);
+ return std::make_pair(ScopeID, nullptr);
+ }
+
+ auto *IntVal = std::get<Value *>(ScopeVariant);
+ if (auto *InstConst = dyn_cast<ConstantInt>(IntVal)) {
+ uint64_t ScopeVal = InstConst->getZExtValue();
+ return std::make_pair(ScopeVal, IntVal);
+ }
+
+ return std::make_pair(std::nullopt, IntVal);
+ };
+
+ // auto [IsWeakConst, IsWeakVal] = processIsWeak(IsWeak);
+ auto [MemorderConst, MemorderCABI] = processMemorder(Memorder);
+ auto [ScopeConst, ScopeVal] = processScope(Scope);
+
+ // https://llvm.org/docs/LangRef.html#cmpxchg-instruction
+ //
+ // The type of ‘<cmp>’ must be an integer or pointer type whose bit width is
+ // a power of two greater than or equal to eight and less than or equal to a
+ // target-specific size limit.
+ bool CanUseAtomicLoadInst = PreferredSize <= MaxAtomicSizeSupported &&
+ llvm::isPowerOf2_64(PreferredSize) && CoercedTy;
+ bool CanUseSingleAtomicLoadInst = CanUseAtomicLoadInst &&
+ MemorderConst.has_value() // && IsWeakConst
+ && ScopeConst;
+ bool CanUseSizedLibcall =
+ canUseSizedAtomicCall(PreferredSize, EffectiveAlign, DL) &&
+ ScopeConst == SyncScope::System;
+ bool CanUseLibcall = ScopeConst == SyncScope::System;
+
+ Value *ExpectedVal;
+ Value *DesiredVal;
+
+ // Emit load instruction, either as a single instruction, or as a case of a
+ // per-constant switch.
+ auto EmitAtomicLoadInst = [&](SyncScope::ID Scope, AtomicOrdering Memorder) {
+ LoadInst *AtomicInst =
+ Builder.CreateLoad(CoercedTy, Ptr, IsVolatile, Name + ".atomic.load");
+ AtomicInst->setAtomic(Memorder, Scope);
+ AtomicInst->setAlignment(EffectiveAlign);
+ AtomicInst->setVolatile(IsVolatile);
+
+ // Store loaded result to where the caller expects it.
+ // FIXME: Do we need to zero the padding, if any?
+ Builder.CreateStore(AtomicInst, RetPtr, IsVolatile);
+ };
+
+ if (CanUseSingleAtomicLoadInst && AllowInstruction) {
+ return EmitAtomicLoadInst(*ScopeConst, *MemorderConst);
+ }
+
+ if (CanUseAtomicLoadInst && AllowSwitch && AllowInstruction) {
+ auto createBasicBlock = [&](const Twine &BBName) {
+ return BasicBlock::Create(Ctx, Name + BBName, CurFn);
+ };
+
+ auto GenMemorderSwitch = [&](SyncScope::ID Scope) {
+ if (MemorderConst)
+ return EmitAtomicLoadInst(Scope, *MemorderConst);
+
+ // Create all the relevant BB's
+ BasicBlock *MonotonicBB = createBasicBlock(".monotonic");
+ BasicBlock *AcquireBB = createBasicBlock(".acquire");
+ BasicBlock *ReleaseBB = createBasicBlock(".release");
+ BasicBlock *AcqRelBB = createBasicBlock(".acqrel");
+ BasicBlock *SeqCstBB = createBasicBlock(".seqcst");
+ BasicBlock *ContBB = createBasicBlock(".atomic.continue");
+
+ // Create the switch for the split
+ // MonotonicBB is arbitrarily chosen as the default case; in practice,
+ // this doesn't matter unless someone is crazy enough to use something
+ // that doesn't fold to a constant for the ordering.
+ Value *Order =
+ Builder.CreateIntCast(MemorderCABI, Builder.getInt32Ty(), false);
+ llvm::SwitchInst *SI = Builder.CreateSwitch(Order, MonotonicBB);
+
+ Builder.SetInsertPoint(ContBB);
+
+ // Emit all the different atomics
+ Builder.SetInsertPoint(MonotonicBB);
+ EmitAtomicLoadInst(Scope, AtomicOrdering::Monotonic);
+ Builder.CreateBr(ContBB);
+
+ Builder.SetInsertPoint(AcquireBB);
+ EmitAtomicLoadInst(Scope, AtomicOrdering::Acquire);
+ Builder.CreateBr(ContBB);
+ SI->addCase(
+ Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::consume)),
+ Builder.GetInsertBlock());
+ SI->addCase(
+ Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::acquire)),
+ Builder.GetInsertBlock());
+
+ Builder.SetInsertPoint(ReleaseBB);
+ EmitAtomicLoadInst(Scope, AtomicOrdering::Release);
+ Builder.CreateBr(ContBB);
+ SI->addCase(
+ Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::release)),
+ Builder.GetInsertBlock());
+
+ Builder.SetInsertPoint(AcqRelBB);
+ EmitAtomicLoadInst(Scope, AtomicOrdering::AcquireRelease);
+ Builder.CreateBr(ContBB);
+ SI->addCase(
+ Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::acq_rel)),
+ AcqRelBB);
+
+ Builder.SetInsertPoint(SeqCstBB);
+ EmitAtomicLoadInst(Scope, AtomicOrdering::SequentiallyConsistent);
+ Builder.CreateBr(ContBB);
+ SI->addCase(
+ Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::seq_cst)),
+ SeqCstBB);
+
+ Builder.SetInsertPoint(ContBB);
+ return;
+ };
+
+ auto GenScopeSwitch = [&]() {
+ if (ScopeConst)
+ return GenMemorderSwitch(*ScopeConst);
+
+ // Handle non-constant scope.
+ DenseMap<unsigned, BasicBlock *> BB;
+ for (const auto &S : SyncScopes) {
+ if (FallbackScope == S.second)
+ continue; // always the default case
+ BB[S.first] = createBasicBlock(Twine(".atomic.scope.") + S.second);
+ }
+
+ BasicBlock *DefaultBB = createBasicBlock(".atomic.scope.fallback");
+ BasicBlock *ContBB = createBasicBlock(".atomic.scope.continue");
+
+ Builder.SetInsertPoint(ContBB);
+
+ Value *SC = Builder.CreateIntCast(ScopeVal, Builder.getInt32Ty(),
+ /*IsSigned=*/false,
+ Name + ".atomic.scope.cast");
+ // If unsupported synch scope is encountered at run time, assume a
+ // fallback synch scope value.
+ SwitchInst *SI = Builder.CreateSwitch(SC, DefaultBB);
+ for (const auto &S : SyncScopes) {
+ BasicBlock *B = BB[S.first];
+ SI->addCase(Builder.getInt32(S.first), B);
+
+ Builder.SetInsertPoint(B);
+ SyncScope::ID SyncScopeID = Ctx.getOrInsertSyncScopeID(S.second);
+ GenMemorderSwitch(SyncScopeID);
+ Builder.CreateBr(ContBB);
+ }
+
+ Builder.SetInsertPoint(DefaultBB);
+ SyncScope::ID SyncScopeID = Ctx.getOrInsertSyncScopeID(FallbackScope);
+ GenMemorderSwitch(SyncScopeID);
+ Builder.CreateBr(ContBB);
+
+ Builder.SetInsertPoint(ContBB);
+ return;
+ };
+
+ return GenScopeSwitch();
+ }
+
+ if (CanUseSizedLibcall && AllowSizedLibcall) {
+ Value *LoadResult =
+ emitAtomicLoadN(PreferredSize, Ptr, MemorderCABI, Builder, DL, TLI);
+ LoadResult->setName(Name);
+ if (LoadResult) {
+ Builder.CreateStore(LoadResult, RetPtr);
+ return;
+ }
+
+ // emitAtomicLoadN can return nullptr if the backend does not
+ // support sized libcalls. Fall back to the non-sized libcall and remove the
+ // unused load again.
+ }
+
+ if (CanUseLibcall && AllowLibcall) {
+ // Fallback to a libcall function. From here on IsWeak/Scope/IsVolatile is
+ // ignored. IsWeak is assumed to be false, Scope is assumed to be
+ // SyncScope::System (strongest possible assumption synchronizing with
+ // everything, instead of just a subset of sibling threads), and volatile
+ // does not apply to function calls.
+
+ Value *DataSizeVal =
+ ConstantInt::get(getSizeTTy(Builder, TLI), DataSizeConst);
+ Value *LoadCall = emitAtomicLoad(DataSizeVal, Ptr, RetPtr, MemorderCABI,
+ Builder, DL, TLI);
+ if (LoadCall) {
+ LoadCall->setName(Name);
+ return;
+ }
+ }
+
+ report_fatal_error(
+ "__atomic_load builtin not supported by any available means");
+}
+
+void llvm::emitAtomicStoreBuiltin(
+ Value *Ptr, Value *ValPtr,
+ // std::variant<Value *, bool> IsWeak,
+ bool IsVolatile,
+ std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> Memorder,
+ std::variant<Value *, SyncScope::ID, StringRef> Scope, Type *DataTy,
+ std::optional<uint64_t> DataSize, std::optional<uint64_t> AvailableSize,
+ MaybeAlign Align, IRBuilderBase &Builder, const DataLayout &DL,
+ const TargetLibraryInfo *TLI, const TargetLowering *TL,
+ ArrayRef<std::pair<uint32_t, StringRef>> SyncScopes,
+ StringRef FallbackScope, llvm::Twine Name, bool AllowInstruction,
+ bool AllowSwitch, bool AllowSizedLibcall, bool AllowLibcall) {
+ assert(Ptr->getType()->isPointerTy());
+ assert(ValPtr->getType()->isPointerTy());
+ assert(TLI);
+
+ LLVMContext &Ctx = Builder.getContext();
+ Function *CurFn = Builder.GetInsertBlock()->getParent();
+
+ unsigned MaxAtomicSizeSupported = 16;
+ if (TL)
+ MaxAtomicSizeSupported = TL->getMaxAtomicSizeInBitsSupported() / 8;
+
+ uint64_t DataSizeConst;
+ if (DataSize) {
+ DataSizeConst = *DataSize;
+ } else {
+ TypeSize DS = DL.getTypeStoreSize(DataTy);
+ DataSizeConst = DS.getFixedValue();
+ }
+ uint64_t AvailableSizeConst = AvailableSize.value_or(DataSizeConst);
+ assert(DataSizeConst <= AvailableSizeConst);
+
+#ifndef NDEBUG
+ if (DataTy) {
+ // 'long double' (80-bit extended precision) behaves strange here.
+ // DL.getTypeStoreSize says it is 10 bytes
+ // Clang says it is 12 bytes
+ // AtomicExpandPass would disagree with CGAtomic (not for cmpxchg that does
+ // not support floats, so AtomicExpandPass doesn't even know it originally
+ // was an FP80)
+ TypeSize DS = DL.getTypeStoreSize(DataTy);
+ assert(DS.getKnownMinValue() <= DataSizeConst &&
+ "Must access at least all the relevant bits of the data, possibly "
+ "some more for padding");
+ }
+#endif
+
+ Type *BoolTy = Builder.getInt1Ty();
+ Type *IntTy = getIntTy(Builder, TLI);
+
+ uint64_t PreferredSize = PowerOf2Ceil(DataSizeConst);
+ if (!PreferredSize || PreferredSize > MaxAtomicSizeSupported)
+ PreferredSize = DataSizeConst;
+
+ llvm::Align EffectiveAlign;
+ if (Align) {
+ EffectiveAlign = *Align;
+ } else {
+ // https://llvm.org/docs/LangRef.html#cmpxchg-instruction
+ //
+ // The alignment is only optional when parsing textual IR; for in-memory
+ // IR, it is always present. If unspecified, the alignment is assumed to
+ // be equal to the size of the ‘<value>’ type.
+ //
+ // We prefer safety here and assume no alignment, unless
+ // getPointerAlignment() can determine the actual alignment.
+ EffectiveAlign = Ptr->getPointerAlignment(DL);
+ }
+
+ // Only use the original data type if it is compatible with cmpxchg (and sized
+ // libcall function) and matches the preferred size. No type punning needed
+ // for __atomic_compare_exchange which only takes pointers.
+ Type *CoercedTy = nullptr;
+ if (DataTy && DataSizeConst == PreferredSize &&
+ (DataTy->isIntegerTy() || DataTy->isPointerTy()))
+ CoercedTy = DataTy;
+ else if (PreferredSize <= 16)
+ CoercedTy = IntegerType::get(Ctx, PreferredSize * 8);
+
+ // For resolving the SuccessMemorder/FailureMemorder arguments. If it is
+ // constant, determine the AtomicOrdering for use with the cmpxchg
+ // instruction. Also determines the llvm::Value to be passed to
+ // __atomic_compare_exchange in case cmpxchg is not legal.
+ auto processMemorder = [&](auto MemorderVariant)
+ -> std::pair<std::optional<AtomicOrdering>, Value *> {
+ if (std::holds_alternative<AtomicOrdering>(MemorderVariant)) {
+ auto Memorder = std::get<AtomicOrdering>(MemorderVariant);
+ return std::make_pair(
+ Memorder,
+ ConstantInt::get(IntTy, static_cast<uint64_t>(toCABI(Memorder))));
+ }
+
+ if (std::holds_alternative<AtomicOrderingCABI>(MemorderVariant)) {
+ auto MemorderCABI = std::get<AtomicOrderingCABI>(MemorderVariant);
+ return std::make_pair(
+ fromCABI(MemorderCABI),
+ ConstantInt::get(IntTy, static_cast<uint64_t>(MemorderCABI)));
+ }
+
+ auto *MemorderCABI = std::get<Value *>(MemorderVariant);
+ if (auto *MO = dyn_cast<ConstantInt>(MemorderCABI)) {
+ uint64_t MOInt = MO->getZExtValue();
+ return std::make_pair(fromCABI(MOInt), MO);
+ }
+
+ return std::make_pair(std::nullopt, MemorderCABI);
+ };
+
+ auto processScope = [&](auto ScopeVariant)
+ -> std::pair<std::optional<SyncScope::ID>, Value *> {
+ if (std::holds_alternative<SyncScope::ID>(ScopeVariant)) {
+ auto ScopeID = std::get<SyncScope::ID>(ScopeVariant);
+ return std::make_pair(ScopeID, nullptr);
+ }
+
+ if (std::holds_alternative<StringRef>(ScopeVariant)) {
+ auto ScopeName = std::get<StringRef>(ScopeVariant);
+ SyncScope::ID ScopeID = Ctx.getOrInsertSyncScopeID(ScopeName);
+ return std::make_pair(ScopeID, nullptr);
+ }
+
+ auto *IntVal = std::get<Value *>(ScopeVariant);
+ if (auto *InstConst = dyn_cast<ConstantInt>(IntVal)) {
+ uint64_t ScopeVal = InstConst->getZExtValue();
+ return std::make_pair(ScopeVal, IntVal);
+ }
+
+ return std::make_pair(std::nullopt, IntVal);
+ };
+
+ // auto [IsWeakConst, IsWeakVal] = processIsWeak(IsWeak);
+ auto [MemorderConst, MemorderCABI] = processMemorder(Memorder);
+ auto [ScopeConst, ScopeVal] = processScope(Scope);
+
+ // https://llvm.org/docs/LangRef.html#cmpxchg-instruction
+ //
+ // The type of ‘<cmp>’ must be an integer or pointer type whose bit width is
+ // a power of two greater than or equal to eight and less than or equal to a
+ // target-specific size limit.
+ bool CanUseAtomicLoadInst = PreferredSize <= MaxAtomicSizeSupported &&
+ llvm::isPowerOf2_64(PreferredSize) && CoercedTy;
+ bool CanUseSingleAtomicLoadInst = CanUseAtomicLoadInst &&
+ MemorderConst.has_value() // && IsWeakConst
+ && ScopeConst;
+ bool CanUseSizedLibcall =
+ canUseSizedAtomicCall(PreferredSize, EffectiveAlign, DL) &&
+ ScopeConst == SyncScope::System;
+ bool CanUseLibcall = ScopeConst == SyncScope::System;
+
+ Value *ExpectedVal;
+ Value *DesiredVal;
+
+ LoadInst *Val;
+
+ // Emit load instruction, either as a single instruction, or as a case of a
+ // per-constant switch.
+ auto EmitAtomicStoreInst = [&](SyncScope::ID Scope, AtomicOrdering Memorder) {
+ StoreInst *AtomicInst = Builder.CreateStore(Val, Ptr, IsVolatile);
+ AtomicInst->setAtomic(Memorder, Scope);
+ AtomicInst->setAlignment(EffectiveAlign);
+ AtomicInst->setVolatile(IsVolatile);
+ };
+
+ if (CanUseSingleAtomicLoadInst && AllowInstruction) {
+ Val = Builder.CreateLoad(CoercedTy, ValPtr, Name + ".atomic.val");
+ return EmitAtomicStoreInst(*ScopeConst, *MemorderConst);
+ }
+
+ if (CanUseAtomicLoadInst && AllowSwitch && AllowInstruction) {
+ Val = Builder.CreateLoad(CoercedTy, ValPtr, Name + ".atomic.val");
+
+ auto createBasicBlock = [&](const Twine &BBName) {
+ return BasicBlock::Create(Ctx, Name + BBName, CurFn);
+ };
+
+ auto GenMemorderSwitch = [&](SyncScope::ID Scope) {
+ if (MemorderConst)
+ return EmitAtomicStoreInst(Scope, *MemorderConst);
+
+ // Create all the relevant BB's
+ BasicBlock *MonotonicBB = createBasicBlock(".monotonic");
+ BasicBlock *AcquireBB = createBasicBlock(".acquire");
+ BasicBlock *ReleaseBB = createBasicBlock(".release");
+ BasicBlock *AcqRelBB = createBasicBlock(".acqrel");
+ BasicBlock *SeqCstBB = createBasicBlock(".seqcst");
+ BasicBlock *ContBB = createBasicBlock(".atomic.continue");
+
+ // Create the switch for the split
+ // MonotonicBB is arbitrarily chosen as the default case; in practice,
+ // this doesn't matter unless someone is crazy enough to use something
+ // that doesn't fold to a constant for the ordering.
+ Value *Order =
+ Builder.CreateIntCast(MemorderCABI, Builder.getInt32Ty(), false);
+ llvm::SwitchInst *SI = Builder.CreateSwitch(Order, MonotonicBB);
+
+ Builder.SetInsertPoint(ContBB);
+
+ // Emit all the different atomics
+ Builder.SetInsertPoint(MonotonicBB);
+ EmitAtomicStoreInst(Scope, AtomicOrdering::Monotonic);
+ Builder.CreateBr(ContBB);
+
+ Builder.SetInsertPoint(AcquireBB);
+ EmitAtomicStoreInst(Scope, AtomicOrdering::Acquire);
+ Builder.CreateBr(ContBB);
+ SI->addCase(
+ Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::consume)),
+ Builder.GetInsertBlock());
+ SI->addCase(
+ Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::acquire)),
+ Builder.GetInsertBlock());
+
+ Builder.SetInsertPoint(ReleaseBB);
+ EmitAtomicStoreInst(Scope, AtomicOrdering::Release);
+ Builder.CreateBr(ContBB);
+ SI->addCase(
+ Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::release)),
+ Builder.GetInsertBlock());
+
+ Builder.SetInsertPoint(AcqRelBB);
+ EmitAtomicStoreInst(Scope, AtomicOrdering::AcquireRelease);
+ Builder.CreateBr(ContBB);
+ SI->addCase(
+ Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::acq_rel)),
+ AcqRelBB);
+
+ Builder.SetInsertPoint(SeqCstBB);
+ EmitAtomicStoreInst(Scope, AtomicOrdering::SequentiallyConsistent);
+ Builder.CreateBr(ContBB);
+ SI->addCase(
+ Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::seq_cst)),
+ SeqCstBB);
+
+ Builder.SetInsertPoint(ContBB);
+ return;
+ };
+
+ auto GenScopeSwitch = [&]() {
+ if (ScopeConst)
+ return GenMemorderSwitch(*ScopeConst);
+
+ // Handle non-constant scope.
+ DenseMap<unsigned, BasicBlock *> BB;
+ for (const auto &S : SyncScopes) {
+ if (FallbackScope == S.second)
+ continue; // always the default case
+ BB[S.first] = createBasicBlock(Twine(".atomic.scope.") + S.second);
+ }
+
+ BasicBlock *DefaultBB = createBasicBlock(".atomic.scope.fallback");
+ BasicBlock *ContBB = createBasicBlock(".atomic.scope.continue");
+
+ Builder.SetInsertPoint(ContBB);
+
+ Value *SC = Builder.CreateIntCast(ScopeVal, Builder.getInt32Ty(),
+ /*IsSigned=*/false,
+ Name + ".atomic.scope.cast");
+ // If unsupported synch scope is encountered at run time, assume a
+ // fallback synch scope value.
+ SwitchInst *SI = Builder.CreateSwitch(SC, DefaultBB);
+ for (const auto &S : SyncScopes) {
+ BasicBlock *B = BB[S.first];
+ SI->addCase(Builder.getInt32(S.first), B);
+
+ Builder.SetInsertPoint(B);
+ SyncScope::ID SyncScopeID = Ctx.getOrInsertSyncScopeID(S.second);
+ GenMemorderSwitch(SyncScopeID);
+ Builder.CreateBr(ContBB);
+ }
+
+ Builder.SetInsertPoint(DefaultBB);
+ SyncScope::ID SyncScopeID = Ctx.getOrInsertSyncScopeID(FallbackScope);
+ GenMemorderSwitch(SyncScopeID);
+ Builder.CreateBr(ContBB);
+
+ Builder.SetInsertPoint(ContBB);
+ return;
+ };
+
+ return GenScopeSwitch();
+ }
+
+ if (CanUseSizedLibcall && AllowSizedLibcall) {
+ Val = Builder.CreateLoad(CoercedTy, ValPtr, Name + ".atomic.val");
+ Value *StoreCall = emitAtomicStoreN(DataSizeConst, Ptr, Val, MemorderCABI,
+ Builder, DL, TLI);
+ StoreCall->setName(Name);
+ if (StoreCall)
+ return;
+
+ // emitAtomiStoreN can return nullptr if the backend does not
+ // support sized libcalls. Fall back to the non-sized libcall and remove the
+ // unused load again.
+ }
+
+ if (CanUseLibcall && AllowLibcall) {
+ // Fallback to a libcall function. From here on IsWeak/Scope/IsVolatile is
+ // ignored. IsWeak is assumed to be false, Scope is assumed to be
+ // SyncScope::System (strongest possible assumption synchronizing with
+ // everything, instead of just a subset of sibling threads), and volatile
+ // does not apply to function calls.
+
+ Value *DataSizeVal =
+ ConstantInt::get(getSizeTTy(Builder, TLI), DataSizeConst);
+ Value *StoreCall = emitAtomicStore(DataSizeVal, Ptr, ValPtr, MemorderCABI,
+ Builder, DL, TLI);
+ if (StoreCall)
+ return;
+ }
+
+ report_fatal_error(
+ "__atomic_store builtin not supported by any available means");
+}
+
Value *llvm::emitAtomicCompareExchangeBuiltin(
Value *Ptr, Value *ExpectedPtr, Value *DesiredPtr,
std::variant<Value *, bool> IsWeak, bool IsVolatile,
std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder,
- std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> FailureMemorder,
+ std::variant<std::monostate, Value *, AtomicOrdering, AtomicOrderingCABI>
+ FailureMemorder,
std::variant<Value *, SyncScope::ID, StringRef> Scope, Value *PrevPtr,
Type *DataTy, std::optional<uint64_t> DataSize,
std::optional<uint64_t> AvailableSize, MaybeAlign Align,
IRBuilderBase &Builder, const DataLayout &DL, const TargetLibraryInfo *TLI,
const TargetLowering *TL,
ArrayRef<std::pair<uint32_t, StringRef>> SyncScopes,
- StringRef FallbackScope, bool AllowInstruction, bool AllowSwitch,
- bool AllowSizedLibcall) {
+ StringRef FallbackScope, llvm::Twine Name, bool AllowInstruction,
+ bool AllowSwitch, bool AllowSizedLibcall, bool AllowLibcall) {
assert(Ptr->getType()->isPointerTy());
assert(ExpectedPtr->getType()->isPointerTy());
assert(DesiredPtr->getType()->isPointerTy());
@@ -130,18 +786,30 @@ Value *llvm::emitAtomicCompareExchangeBuiltin(
else if (PreferredSize <= 16)
CoercedTy = IntegerType::get(Ctx, PreferredSize * 8);
+ std::optional<AtomicOrdering> SuccessMemorderConst;
// For resolving the SuccessMemorder/FailureMemorder arguments. If it is
// constant, determine the AtomicOrdering for use with the cmpxchg
// instruction. Also determines the llvm::Value to be passed to
// __atomic_compare_exchange in case cmpxchg is not legal.
auto processMemorder = [&](auto MemorderVariant)
-> std::pair<std::optional<AtomicOrdering>, Value *> {
+ if (holds_alternative_if_exists<std::monostate>(MemorderVariant)) {
+ // Derive FailureMemorder from SucccessMemorder
+ if (SuccessMemorderConst) {
+ AtomicOrdering MOFailure =
+ llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(
+ *SuccessMemorderConst);
+ MemorderVariant = MOFailure;
+ }
+ }
+
if (std::holds_alternative<AtomicOrdering>(MemorderVariant)) {
auto Memorder = std::get<AtomicOrdering>(MemorderVariant);
return std::make_pair(
Memorder,
ConstantInt::get(IntTy, static_cast<uint64_t>(toCABI(Memorder))));
}
+
if (std::holds_alternative<AtomicOrderingCABI>(MemorderVariant)) {
auto MemorderCABI = std::get<AtomicOrderingCABI>(MemorderVariant);
return std::make_pair(
@@ -196,8 +864,9 @@ Value *llvm::emitAtomicCompareExchangeBuiltin(
return std::make_pair(std::nullopt, IntVal);
};
+ Value *SuccessMemorderCABI;
auto [IsWeakConst, IsWeakVal] = processIsWeak(IsWeak);
- auto [SuccessMemorderConst, SuccessMemorderCABI] =
+ std::tie(SuccessMemorderConst, SuccessMemorderCABI) =
processMemorder(SuccessMemorder);
auto [FailureMemorderConst, FailureMemorderCABI] =
processMemorder(FailureMemorder);
@@ -241,7 +910,9 @@ Value *llvm::emitAtomicCompareExchangeBuiltin(
FailureMemorderConst && IsWeakConst &&
ScopeConst;
bool CanUseSizedLibcall =
- canUseSizedAtomicCall(PreferredSize, EffectiveAlign, DL);
+ canUseSizedAtomicCall(PreferredSize, EffectiveAlign, DL) &&
+ ScopeConst == SyncScope::System;
+ bool CanUseLibcall = ScopeConst == SyncScope::System;
Value *ExpectedVal;
Value *DesiredVal;
@@ -254,19 +925,19 @@ Value *llvm::emitAtomicCompareExchangeBuiltin(
AtomicCmpXchgInst *AtomicInst =
Builder.CreateAtomicCmpXchg(Ptr, ExpectedVal, DesiredVal, Align,
SuccessMemorder, FailureMemorder, Scope);
- AtomicInst->setName("cmpxchg.pair");
+ AtomicInst->setName(Name + ".cmpxchg.pair");
AtomicInst->setAlignment(EffectiveAlign);
AtomicInst->setWeak(IsWeak);
AtomicInst->setVolatile(IsVolatile);
if (PrevPtr) {
- Value *PreviousVal =
- Builder.CreateExtractValue(AtomicInst, /*Idxs=*/0, "cmpxchg.prev");
+ Value *PreviousVal = Builder.CreateExtractValue(AtomicInst, /*Idxs=*/0,
+ Name + ".cmpxchg.prev");
Builder.CreateStore(PreviousVal, PrevPtr);
}
- Value *SuccessFailureVal =
- Builder.CreateExtractValue(AtomicInst, /*Idxs=*/1, "cmpxchg.success");
+ Value *SuccessFailureVal = Builder.CreateExtractValue(
+ AtomicInst, /*Idxs=*/1, Name + ".cmpxchg.success");
assert(SuccessFailureVal->getType()->isIntegerTy(1));
return SuccessFailureVal;
@@ -275,8 +946,9 @@ Value *llvm::emitAtomicCompareExchangeBuiltin(
if (CanUseSingleCmpxchngInst && AllowInstruction) {
// FIXME: Need to get alignment correct
ExpectedVal =
- Builder.CreateLoad(CoercedTy, ExpectedPtr, "cmpxchg.expected");
- DesiredVal = Builder.CreateLoad(CoercedTy, DesiredPtr, "cmpxchg.desired");
+ Builder.CreateLoad(CoercedTy, ExpectedPtr, Name + ".cmpxchg.expected");
+ DesiredVal =
+ Builder.CreateLoad(CoercedTy, DesiredPtr, Name + ".cmpxchg.desired");
return EmitCmpxchngInst(*IsWeakConst, *ScopeConst, *SuccessMemorderConst,
*FailureMemorderConst);
}
@@ -288,13 +960,14 @@ Value *llvm::emitAtomicCompareExchangeBuiltin(
// __atomic_compare_exchange function. In that case the switching was very
// unnecessary but cannot be undone.
if (CanUseCmpxchngInst && AllowSwitch && AllowInstruction) {
- auto createBasicBlock = [&](const Twine &Name) {
- return BasicBlock::Create(Ctx, Name, CurFn);
+ auto createBasicBlock = [&](const Twine &BBName) {
+ return BasicBlock::Create(Ctx, Name + BBName, CurFn);
};
ExpectedVal =
- Builder.CreateLoad(CoercedTy, ExpectedPtr, "cmpxchg.expected");
- DesiredVal = Builder.CreateLoad(CoercedTy, DesiredPtr, "cmpxchg.desired");
+ Builder.CreateLoad(CoercedTy, ExpectedPtr, Name + ".cmpxchg.expected");
+ DesiredVal =
+ Builder.CreateLoad(CoercedTy, DesiredPtr, Name + ".cmpxchg.desired");
auto GenFailureMemorderSwitch =
[&](bool IsWeak, SyncScope::ID Scope,
@@ -355,7 +1028,7 @@ Value *llvm::emitAtomicCompareExchangeBuiltin(
Builder.CreateBr(ContBB);
Builder.SetInsertPoint(ContBB);
- PHINode *Result = Builder.CreatePHI(BoolTy, 3, "cmpxcgh.success");
+ PHINode *Result = Builder.CreatePHI(BoolTy, 3, Name + ".cmpxchg.success");
Result->addIncoming(MonotonicResult, MonotonicSourceBB);
Result->addIncoming(AcquireResult, AcquireSourceBB);
Result->addIncoming(SeqCstResult, SeqCstSourceBB);
@@ -368,12 +1041,12 @@ Value *llvm::emitAtomicCompareExchangeBuiltin(
return GenFailureMemorderSwitch(IsWeak, Scope, *SuccessMemorderConst);
// Create all the relevant BB's
- BasicBlock *MonotonicBB = createBasicBlock("monotonic");
- BasicBlock *AcquireBB = createBasicBlock("acquire");
- BasicBlock *ReleaseBB = createBasicBlock("release");
- BasicBlock *AcqRelBB = createBasicBlock("acqrel");
- BasicBlock *SeqCstBB = createBasicBlock("seqcst");
- BasicBlock *ContBB = createBasicBlock("atomic.continue");
+ BasicBlock *MonotonicBB = createBasicBlock(".monotonic");
+ BasicBlock *AcquireBB = createBasicBlock(".acquire");
+ BasicBlock *ReleaseBB = createBasicBlock(".release");
+ BasicBlock *AcqRelBB = createBasicBlock(".acqrel");
+ BasicBlock *SeqCstBB = createBasicBlock(".seqcst");
+ BasicBlock *ContBB = createBasicBlock(".atomic.continue");
// Create the switch for the split
// MonotonicBB is arbitrarily chosen as the default case; in practice,
@@ -384,7 +1057,7 @@ Value *llvm::emitAtomicCompareExchangeBuiltin(
llvm::SwitchInst *SI = Builder.CreateSwitch(Order, MonotonicBB);
Builder.SetInsertPoint(ContBB);
- PHINode *Result = Builder.CreatePHI(BoolTy, 5, "cmpxcgh.success");
+ PHINode *Result = Builder.CreatePHI(BoolTy, 5, Name + ".cmpxchg.success");
// Emit all the different atomics
Builder.SetInsertPoint(MonotonicBB);
@@ -445,19 +1118,19 @@ Value *llvm::emitAtomicCompareExchangeBuiltin(
for (const auto &S : SyncScopes) {
if (FallbackScope == S.second)
continue; // always the default case
- BB[S.first] = createBasicBlock(Twine("cmpxchg.scope.") + S.second);
+ BB[S.first] = createBasicBlock(Twine(".cmpxchg.scope.") + S.second);
}
- BasicBlock *DefaultBB = createBasicBlock("atomic.scope.fallback");
- BasicBlock *ContBB = createBasicBlock("atomic.scope.continue");
+ BasicBlock *DefaultBB = createBasicBlock(".cmpxchg.scope.fallback");
+ BasicBlock *ContBB = createBasicBlock(".cmpxchg.scope.continue");
Builder.SetInsertPoint(ContBB);
- PHINode *Result =
- Builder.CreatePHI(BoolTy, SyncScopes.size() + 1, "cmpxchg.success");
+ PHINode *Result = Builder.CreatePHI(BoolTy, SyncScopes.size() + 1,
+ Name + ".cmpxchg.success");
Value *SC = Builder.CreateIntCast(ScopeVal, Builder.getInt32Ty(),
/*IsSigned*/ false,
- "atomic.cmpxchg.scope.cast");
+ Name + ".cmpxchg.scope.cast");
// If unsupported synch scope is encountered at run time, assume a
// fallback synch scope value.
SwitchInst *SI = Builder.CreateSwitch(SC, DefaultBB);
@@ -487,9 +1160,9 @@ Value *llvm::emitAtomicCompareExchangeBuiltin(
return GenScopeSwitch(*IsWeakConst);
// Create all the relevant BB's
- BasicBlock *StrongBB = createBasicBlock("cmpxchg.strong");
- BasicBlock *WeakBB = createBasicBlock("cmpxchg.weak");
- BasicBlock *ContBB = createBasicBlock("cmpxchg.continue");
+ BasicBlock *StrongBB = createBasicBlock(".cmpxchg.strong");
+ BasicBlock *WeakBB = createBasicBlock(".cmpxchg.weak");
+ BasicBlock *ContBB = createBasicBlock(".cmpxchg.continue");
// FIXME: Why is this a switch?
llvm::SwitchInst *SI = Builder.CreateSwitch(IsWeakVal, WeakBB);
@@ -506,7 +1179,8 @@ Value *llvm::emitAtomicCompareExchangeBuiltin(
Builder.CreateBr(ContBB);
Builder.SetInsertPoint(ContBB);
- PHINode *Result = Builder.CreatePHI(BoolTy, 2, "cmpxchg.isweak.success");
+ PHINode *Result =
+ Builder.CreatePHI(BoolTy, 2, Name + ".cmpxchg.isweak.success");
Result->addIncoming(WeakResult, WeakSourceBB);
Result->addIncoming(StrongResult, StrongSourceBB);
return Result;
@@ -515,31 +1189,17 @@ Value *llvm::emitAtomicCompareExchangeBuiltin(
return GenWeakSwitch();
}
- // Fallback to a libcall function. From here on IsWeak/Scope/IsVolatile is
- // ignored. IsWeak is assumed to be false, Scope is assumed to be
- // SyncScope::System (strongest possible assumption synchronizing with
- // everything, instead of just a subset of sibling threads), and volatile does
- // not apply to function calls.
-
- // FIXME: Some AMDGCN regression tests the addrspace, but
- // __atomic_compare_exchange by definition is addrsspace(0) and
- // emitAtomicCompareExchange will complain about it.
- if (Ptr->getType()->getPointerAddressSpace() ||
- ExpectedPtr->getType()->getPointerAddressSpace() ||
- DesiredPtr->getType()->getPointerAddressSpace())
- return Builder.getInt1(false);
-
if (CanUseSizedLibcall && AllowSizedLibcall) {
LoadInst *DesiredVal =
Builder.CreateLoad(IntegerType::get(Ctx, PreferredSize * 8), DesiredPtr,
- "cmpxchg.desired");
+ Name + ".cmpxchg.desired");
Value *SuccessResult = emitAtomicCompareExchangeN(
PreferredSize, Ptr, ExpectedPtr, DesiredVal, SuccessMemorderCABI,
FailureMemorderCABI, Builder, DL, TLI);
if (SuccessResult) {
Value *SuccessBool =
Builder.CreateCmp(CmpInst::Predicate::ICMP_EQ, SuccessResult,
- Builder.getInt8(0), "cmpxchg.success");
+ Builder.getInt8(0), Name + ".cmpxchg.success");
if (PrevPtr && PrevPtr != ExpectedPtr)
Builder.CreateMemCpy(PrevPtr, {}, ExpectedPtr, {}, DataSizeConst);
@@ -552,42 +1212,61 @@ Value *llvm::emitAtomicCompareExchangeBuiltin(
DesiredVal->eraseFromParent();
}
- // FIXME: emitAtomicCompareExchange may fail if a function declaration with
- // the same name but different signature has already been emitted. Since the
- // function name starts with "__", i.e. is reserved for use by the compiler,
- // this should not happen.
- // It may also fail if the target's TargetLibraryInfo claims that
- // __atomic_compare_exchange is not supported. In either case there is no
- // fallback for atomics not supported by the target and we have to crash.
- Value *SuccessResult = emitAtomicCompareExchange(
- ConstantInt::get(getSizeTTy(Builder, TLI), DataSizeConst), Ptr,
- ExpectedPtr, DesiredPtr, SuccessMemorderCABI, FailureMemorderCABI,
- Builder, DL, TLI);
- if (!SuccessResult)
- report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");
-
- Value *SuccessBool =
- Builder.CreateCmp(CmpInst::Predicate::ICMP_EQ, SuccessResult,
- Builder.getInt8(0), "cmpxchg.success");
-
- if (PrevPtr && PrevPtr != ExpectedPtr)
- Builder.CreateMemCpy(PrevPtr, {}, ExpectedPtr, {}, DataSizeConst);
- return SuccessBool;
+ if (CanUseLibcall && AllowLibcall) {
+ // Fallback to a libcall function. From here on IsWeak/Scope/IsVolatile is
+ // ignored. IsWeak is assumed to be false, Scope is assumed to be
+ // SyncScope::System (strongest possible assumption synchronizing with
+ // everything, instead of just a subset of sibling threads), and volatile
+ // does not apply to function calls.
+
+ // FIXME: Some AMDGCN regression tests the addrspace, but
+ // __atomic_compare_exchange by definition is addrsspace(0) and
+ // emitAtomicCompareExchange will complain about it.
+ if (Ptr->getType()->getPointerAddressSpace() ||
+ ExpectedPtr->getType()->getPointerAddressSpace() ||
+ DesiredPtr->getType()->getPointerAddressSpace())
+ return Builder.getInt1(false);
+
+ // FIXME: emitAtomicCompareExchange may fail if a function declaration with
+ // the same name but different signature has already been emitted or the
+ // target does not support it. Since the function name starts with "__",
+ // i.e. is reserved for use by the compiler, this should not happen. It may
+ // also fail if the target's TargetLibraryInfo claims that
+ // __atomic_compare_exchange is not supported. In either case there is no
+ // fallback for atomics not supported by the target and we have to crash.
+ Value *SuccessResult = emitAtomicCompareExchange(
+ ConstantInt::get(getSizeTTy(Builder, TLI), DataSizeConst), Ptr,
+ ExpectedPtr, DesiredPtr, SuccessMemorderCABI, FailureMemorderCABI,
+ Builder, DL, TLI);
+ if (SuccessResult) {
+ Value *SuccessBool =
+ Builder.CreateCmp(CmpInst::Predicate::ICMP_EQ, SuccessResult,
+ Builder.getInt8(0), Name + ".cmpxchg.success");
+
+ if (PrevPtr && PrevPtr != ExpectedPtr)
+ Builder.CreateMemCpy(PrevPtr, {}, ExpectedPtr, {}, DataSizeConst);
+ return SuccessBool;
+ }
+ }
+
+ report_fatal_error(
+ "__atomic_compare_exchange builtin not supported by any available means");
}
Value *llvm::emitAtomicCompareExchangeBuiltin(
Value *Ptr, Value *ExpectedPtr, Value *DesiredPtr,
- std::variant<Value *, bool> Weak, bool IsVolatile,
+ std::variant<Value *, bool> IsWeak, bool IsVolatile,
std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder,
- std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> FailureMemorder,
+ std::variant<std::monostate, Value *, AtomicOrdering, AtomicOrderingCABI>
+ FailureMemorder,
Value *PrevPtr, Type *DataTy, std::optional<uint64_t> DataSize,
std::optional<uint64_t> AvailableSize, MaybeAlign Align,
IRBuilderBase &Builder, const DataLayout &DL, const TargetLibraryInfo *TLI,
- const TargetLowering *TL, bool AllowInstruction, bool AllowSwitch,
- bool AllowSizedLibcall) {
+ const TargetLowering *TL, llvm::Twine Name, bool AllowInstruction,
+ bool AllowSwitch, bool AllowSizedLibcall, bool AllowLibcall) {
return emitAtomicCompareExchangeBuiltin(
- Ptr, ExpectedPtr, DesiredPtr, Weak, IsVolatile, SuccessMemorder,
+ Ptr, ExpectedPtr, DesiredPtr, IsWeak, IsVolatile, SuccessMemorder,
FailureMemorder, SyncScope::System, PrevPtr, DataTy, DataSize,
- AvailableSize, Align, Builder, DL, TLI, TL, {}, StringRef(),
- AllowInstruction, AllowSwitch, AllowSizedLibcall);
+ AvailableSize, Align, Builder, DL, TLI, TL, {}, StringRef(), Name,
+ AllowInstruction, AllowSwitch, AllowSizedLibcall, AllowLibcall);
}
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 5d60c8aa9893e9..64c395e85b3a86 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1300,7 +1300,18 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotCapture(F, 2);
Changed |= setWillReturn(F);
break;
+ case LibFunc_atomic_load:
+ case LibFunc_atomic_load_1:
+ case LibFunc_atomic_load_2:
+ case LibFunc_atomic_load_4:
+ case LibFunc_atomic_load_8:
+ case LibFunc_atomic_load_16:
case LibFunc_atomic_compare_exchange:
+ case LibFunc_atomic_compare_exchange_1:
+ case LibFunc_atomic_compare_exchange_2:
+ case LibFunc_atomic_compare_exchange_4:
+ case LibFunc_atomic_compare_exchange_8:
+ case LibFunc_atomic_compare_exchange_16:
Changed |= setArgsNoUndef(F);
Changed |= setWillReturn(F);
Changed |= setDoesNotThrow(F);
@@ -1402,6 +1413,18 @@ FunctionCallee llvm::getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI,
setArgExtAttr(*F, 2, TLI);
break;
+ case LibFunc_atomic_load:
+ setArgExtAttr(*F, 4, TLI); // Memorder
+ break;
+
+ case LibFunc_atomic_load_1:
+ case LibFunc_atomic_load_2:
+ case LibFunc_atomic_load_4:
+ case LibFunc_atomic_load_8:
+ case LibFunc_atomic_load_16:
+ setArgExtAttr(*F, 3, TLI); // Memorder
+ break;
+
case LibFunc_atomic_compare_exchange:
setRetExtAttr(*F, TLI); // return
setArgExtAttr(*F, 4, TLI); // SuccessMemorder
@@ -1774,6 +1797,105 @@ Value *llvm::emitVSPrintf(Value *Dest, Value *Fmt, Value *VAList,
{Dest, Fmt, VAList}, B, TLI);
}
+Value *llvm::emitAtomicLoad(Value *Size, Value *Ptr, Value *Ret,
+ Value *Memorder, IRBuilderBase &B,
+ const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ Type *VoidTy = B.getVoidTy();
+ Type *BoolTy = B.getInt8Ty();
+ Type *SizeTTy = getSizeTTy(B, TLI);
+ Type *PtrTy = B.getPtrTy();
+ Type *IntTy = getIntTy(B, TLI);
+ return emitLibCall(LibFunc_atomic_load, VoidTy,
+ {SizeTTy, PtrTy, PtrTy, IntTy}, {Size, Ptr, Ret, Memorder},
+ B, TLI);
+}
+
+Value *llvm::emitAtomicLoadN(size_t Size, Value *Ptr, Value *Memorder,
+ IRBuilderBase &B, const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ LibFunc TheLibFunc;
+ switch (Size) {
+ case 1:
+ TheLibFunc = LibFunc_atomic_load_1;
+ break;
+ case 2:
+ TheLibFunc = LibFunc_atomic_load_2;
+ break;
+ case 4:
+ TheLibFunc = LibFunc_atomic_load_4;
+ break;
+ case 8:
+ TheLibFunc = LibFunc_atomic_load_8;
+ break;
+ case 16:
+ TheLibFunc = LibFunc_atomic_load_16;
+ break;
+ default:
+ // emitLibCall below is also allowed to return nullptr, e.g. if
+ // TargetLibraryInfo says the backend does not support the libcall function.
+ return nullptr;
+ }
+
+ Type *VoidTy = B.getVoidTy();
+ Type *BoolTy = B.getInt8Ty();
+ Type *PtrTy = B.getPtrTy();
+ Type *ValTy = B.getIntNTy(Size * 8);
+ Type *IntTy = getIntTy(B, TLI);
+ return emitLibCall(TheLibFunc, ValTy, {PtrTy, IntTy}, {Ptr, Memorder}, B,
+ TLI);
+}
+
+Value *llvm::emitAtomicStore(Value *Size, Value *Ptr, Value *ValPtr,
+ Value *Memorder, IRBuilderBase &B,
+ const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ Type *VoidTy = B.getVoidTy();
+ Type *BoolTy = B.getInt8Ty();
+ Type *SizeTTy = getSizeTTy(B, TLI);
+ Type *PtrTy = B.getPtrTy();
+ Type *IntTy = getIntTy(B, TLI);
+ return emitLibCall(LibFunc_atomic_store, VoidTy,
+ {SizeTTy, PtrTy, PtrTy, IntTy},
+ {Size, Ptr, ValPtr, Memorder}, B, TLI);
+}
+
+Value *llvm::emitAtomicStoreN(size_t Size, Value *Ptr, Value *Val,
+ Value *Memorder, IRBuilderBase &B,
+ const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ LibFunc TheLibFunc;
+ switch (Size) {
+ case 1:
+ TheLibFunc = LibFunc_atomic_store_1;
+ break;
+ case 2:
+ TheLibFunc = LibFunc_atomic_store_2;
+ break;
+ case 4:
+ TheLibFunc = LibFunc_atomic_store_4;
+ break;
+ case 8:
+ TheLibFunc = LibFunc_atomic_store_8;
+ break;
+ case 16:
+ TheLibFunc = LibFunc_atomic_store_16;
+ break;
+ default:
+ // emitLibCall below is also allowed to return nullptr, e.g. if
+ // TargetLibraryInfo says the backend does not support the libcall function.
+ return nullptr;
+ }
+
+ Type *VoidTy = B.getVoidTy();
+ Type *BoolTy = B.getInt8Ty();
+ Type *PtrTy = B.getPtrTy();
+ Type *ValTy = B.getIntNTy(Size * 8);
+ Type *IntTy = getIntTy(B, TLI);
+ return emitLibCall(TheLibFunc, VoidTy, {PtrTy, ValTy, IntTy},
+ {Ptr, Val, Memorder}, B, TLI);
+}
+
Value *llvm::emitAtomicCompareExchange(Value *Size, Value *Ptr, Value *Expected,
Value *Desired, Value *SuccessMemorder,
Value *FailureMemorder, IRBuilderBase &B,
@@ -1789,8 +1911,9 @@ Value *llvm::emitAtomicCompareExchange(Value *Size, Value *Ptr, Value *Expected,
{Size, Ptr, Expected, Desired, SuccessMemorder, FailureMemorder}, B, TLI);
}
-Value *llvm::emitAtomicCompareExchangeN(size_t Size, Value *Ptr, Value *Expected,
- Value *Desired, Value *SuccessMemorder,
+Value *llvm::emitAtomicCompareExchangeN(size_t Size, Value *Ptr,
+ Value *Expected, Value *Desired,
+ Value *SuccessMemorder,
Value *FailureMemorder,
IRBuilderBase &B, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
diff --git a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
index 448d92e43a602c..cfbaa5c7822543 100644
--- a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
+++ b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
@@ -34,7 +34,7 @@
#
# CHECK: << Total TLI yes SDK no: 18
# CHECK: >> Total TLI no SDK yes: 0
-# CHECK: == Total TLI yes SDK yes: 277
+# CHECK: == Total TLI yes SDK yes: 271
#
# WRONG_DETAIL: << TLI yes SDK no : '_ZdaPv' aka operator delete[](void*)
# WRONG_DETAIL: >> TLI no SDK yes: '_ZdaPvj' aka operator delete[](void*, unsigned int)
@@ -48,16 +48,16 @@
# WRONG_DETAIL: << TLI yes SDK no : 'fminimum_numl'
# WRONG_SUMMARY: << Total TLI yes SDK no: 19{{$}}
# WRONG_SUMMARY: >> Total TLI no SDK yes: 1{{$}}
-# WRONG_SUMMARY: == Total TLI yes SDK yes: 276
+# WRONG_SUMMARY: == Total TLI yes SDK yes: 270
#
## The -COUNT suffix doesn't care if there are too many matches, so check
## the exact count first; the two directives should add up to that.
## Yes, this means additions to TLI will fail this test, but the argument
## to -COUNT can't be an expression.
-# AVAIL: TLI knows 528 symbols, 295 available
-# AVAIL-COUNT-295: {{^}} available
+# AVAIL: TLI knows 538 symbols, 289 available
+# AVAIL-COUNT-289: {{^}} available
# AVAIL-NOT: {{^}} available
-# UNAVAIL-COUNT-233: not available
+# UNAVAIL-COUNT-249: not available
# UNAVAIL-NOT: not available
## This is a large file so it's worth telling lit to stop here.
@@ -182,30 +182,6 @@ DynamicSymbols:
Type: STT_FUNC
Section: .text
Binding: STB_GLOBAL
- - Name: __atomic_compare_exchange
- Type: STT_FUNC
- Section: .text
- Binding: STB_GLOBAL
- - Name: __atomic_compare_exchange_1
- Type: STT_FUNC
- Section: .text
- Binding: STB_GLOBAL
- - Name: __atomic_compare_exchange_2
- Type: STT_FUNC
- Section: .text
- Binding: STB_GLOBAL
- - Name: __atomic_compare_exchange_4
- Type: STT_FUNC
- Section: .text
- Binding: STB_GLOBAL
- - Name: __atomic_compare_exchange_8
- Type: STT_FUNC
- Section: .text
- Binding: STB_GLOBAL
- - Name: __atomic_compare_exchange_16
- Type: STT_FUNC
- Section: .text
- Binding: STB_GLOBAL
- Name: abs
Type: STT_FUNC
Section: .text
diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
index 19234c3e542fbf..d52ce10ea8fa3a 100644
--- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
+++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
@@ -591,7 +591,18 @@ TEST_F(TargetLibraryInfoTest, ValidProto) {
"declare i8* @memrchr(i8*, i32, i64)\n"
"declare void @__atomic_load(i64, i8*, i8*, i32)\n"
+ "declare i8 @__atomic_load_1(ptr, i32)\n"
+ "declare i16 @__atomic_load_2(ptr, i32)\n"
+ "declare i32 @__atomic_load_4(ptr, i32)\n"
+ "declare i64 @__atomic_load_8(ptr, i32)\n"
+ "declare i128 @__atomic_load_16(ptr, i32)\n"
+
"declare void @__atomic_store(i64, i8*, i8*, i32)\n"
+ "declare void @__atomic_store_1(ptr, i8, i32)\n"
+ "declare void @__atomic_store_2(ptr, i16, i32)\n"
+ "declare void @__atomic_store_4(ptr, i32, i32)\n"
+ "declare void @__atomic_store_8(ptr, i64, i32)\n"
+ "declare void @__atomic_store_16(ptr, i128, i32)\n"
"declare i8 @__atomic_compare_exchange(i64, ptr, ptr, ptr, i32, i32)\n"
"declare i8 @__atomic_compare_exchange_1(ptr, ptr, i8, i32, i32)\n"
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 630cd03c688012..dae14601f6667b 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -3846,11 +3846,14 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteFlt) {
Type *Float32 = Type::getFloatTy(Ctx);
AllocaInst *XVal = Builder.CreateAlloca(Float32);
XVal->setName("AtomicVar");
+ OpenMPIRBuilder::InsertPointTy AllocaIP(XVal->getParent(),
+ XVal->getIterator());
OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false};
AtomicOrdering AO = AtomicOrdering::Monotonic;
Constant *ValToWrite = ConstantFP::get(Float32, 1.0);
- Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO));
+ Builder.restoreIP(
+ OMPBuilder.createAtomicWrite(Loc, AllocaIP, X, ValToWrite, AO));
IntegerType *IntCastTy =
IntegerType::get(M->getContext(), Float32->getScalarSizeInBits());
@@ -3879,13 +3882,16 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteInt) {
IntegerType *Int32 = Type::getInt32Ty(Ctx);
AllocaInst *XVal = Builder.CreateAlloca(Int32);
XVal->setName("AtomicVar");
+ OpenMPIRBuilder::InsertPointTy AllocaIP(XVal->getParent(),
+ XVal->getIterator());
OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
AtomicOrdering AO = AtomicOrdering::Monotonic;
ConstantInt *ValToWrite = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
BasicBlock *EntryBB = BB;
- Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO));
+ Builder.restoreIP(
+ OMPBuilder.createAtomicWrite(Loc, AllocaIP, X, ValToWrite, AO));
StoreInst *StoreofAtomic = nullptr;
>From 600a81374f9dae752e063e51accb1b3cd8b7d657 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Mon, 11 Nov 2024 15:32:38 +0100
Subject: [PATCH 06/17] Refactoring
---
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 14 +-
.../llvm/Transforms/Utils/BuildBuiltins.h | 28 +-
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 149 +-
llvm/lib/Transforms/Utils/BuildBuiltins.cpp | 1501 +++++++----------
.../Frontend/OpenMPIRBuilderTest.cpp | 38 +-
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 13 +-
6 files changed, 727 insertions(+), 1016 deletions(-)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index ce6dd56f52a631..c9c8753f84b313 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -3085,6 +3085,7 @@ class OpenMPIRBuilder {
///
/// \returns A pair of the old value of X before the update, and the value
/// used for the update.
+ /// FIXME: "Value used for the update"? Should be "the updated value"?
Expected<std::pair<Value *, Value *>>
emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
@@ -3125,9 +3126,9 @@ class OpenMPIRBuilder {
/// instructions.
///
/// \return Insertion point after generated atomic read IR.
- InsertPointTy createAtomicRead(const LocationDescription &Loc,
- AtomicOpValue &X, AtomicOpValue &V,
- AtomicOrdering AO);
+ InsertPointOrErrorTy createAtomicRead(const LocationDescription &Loc,
+ AtomicOpValue &X, AtomicOpValue &V,
+ AtomicOrdering AO);
/// Emit atomic write for : X = Expr --- Only Scalar data types.
///
@@ -3138,9 +3139,10 @@ class OpenMPIRBuilder {
/// instructions.
///
/// \return Insertion point after generated atomic Write IR.
- InsertPointTy createAtomicWrite(const LocationDescription &Loc,
- InsertPointTy AllocaIP, AtomicOpValue &X,
- Value *Expr, AtomicOrdering AO);
+ InsertPointOrErrorTy createAtomicWrite(const LocationDescription &Loc,
+ InsertPointTy AllocaIP,
+ AtomicOpValue &X, Value *Expr,
+ AtomicOrdering AO);
/// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
/// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
diff --git a/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h b/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
index f9590868792981..95977752ac8976 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
@@ -17,6 +17,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Error.h"
#include <cstdint>
#include <variant>
@@ -32,7 +33,7 @@ namespace SyncScope {
typedef uint8_t ID;
}
-void emitAtomicLoadBuiltin(
+Error emitAtomicLoadBuiltin(
Value *Ptr, Value *RetPtr,
// std::variant<Value *, bool> IsWeak,
bool IsVolatile,
@@ -46,7 +47,7 @@ void emitAtomicLoadBuiltin(
bool AllowInstruction = true, bool AllowSwitch = true,
bool AllowSizedLibcall = true, bool AllowLibcall = true);
-void emitAtomicStoreBuiltin(
+Error emitAtomicStoreBuiltin(
Value *Ptr, Value *ValPtr,
// std::variant<Value *, bool> IsWeak,
bool IsVolatile,
@@ -73,11 +74,11 @@ void emitAtomicStoreBuiltin(
/// https://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary#GCC_intrinsics
///
/// @param Ptr The memory location accessed atomically.
-/// @Param ExpectedPtr Pointer to the data expected at /p Ptr. The exchange will
-/// only happen if the value at \p Ptr is equal to this. Data
-/// at \p ExpectedPtr may or may not be be overwritten, so do
-/// not use after this call.
-/// @Param DesiredPtr Pointer to the data that the data at /p Ptr is replaced
+/// @Param ExpectedPtr Pointer to the data expected at \p Ptr. The exchange will
+/// only happen if the value at \p Ptr is equal to this
+/// (unless IsWeak is set). Data at \p ExpectedPtr may or may
+/// not be be overwritten, so do not use after this call.
+/// @Param DesiredPtr Pointer to the data that the data at \p Ptr is replaced
/// with.
/// @param IsWeak If true, the exchange may not happen even if the data at
/// \p Ptr equals to \p ExpectedPtr.
@@ -97,8 +98,13 @@ void emitAtomicStoreBuiltin(
/// supports integer and pointers only. If any other type or
/// omitted, type-prunes to an integer the holds at least \p
/// DataSize bytes.
-/// @param PrevPtr (optional) The value that /p Ptr had before the exchange
-/// is stored here.
+/// @param PrevPtr (optional) Receives the value at \p Ptr before the atomic
+/// exchange is attempted. This means:
+/// In case of success: The value at \p Ptr before the
+/// update. That is, the value passed behind \p ExpectedPtr.
+/// In case of failure: The current value at \p Ptr, i.e. the
+/// atomic exchange is effectively only performace an atomic
+/// load of that value.
/// @param DataSize Number of bytes to be exchanged.
/// @param AvailableSize The total size that can be used for the atomic
/// operation. It may include trailing padding in addition to
@@ -128,7 +134,7 @@ void emitAtomicStoreBuiltin(
///
/// @return A boolean value that indicates whether the exchange has happened
/// (true) or not (false).
-Value *emitAtomicCompareExchangeBuiltin(
+Expected<Value *> emitAtomicCompareExchangeBuiltin(
Value *Ptr, Value *ExpectedPtr, Value *DesiredPtr,
std::variant<Value *, bool> IsWeak, bool IsVolatile,
std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder,
@@ -144,7 +150,7 @@ Value *emitAtomicCompareExchangeBuiltin(
bool AllowInstruction = true, bool AllowSwitch = true,
bool AllowSizedLibcall = true, bool AllowLibcall = true);
-Value *emitAtomicCompareExchangeBuiltin(
+Expected<Value *> emitAtomicCompareExchangeBuiltin(
Value *Ptr, Value *ExpectedPtr, Value *DesiredPtr,
std::variant<Value *, bool> IsWeak, bool IsVolatile,
std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder,
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 17cbed65c04c3f..5b9264cfd189be 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -8028,7 +8028,7 @@ bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
return Flush;
}
-OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::InsertPointOrErrorTy
OpenMPIRBuilder::createAtomicRead(const LocationDescription &Loc,
AtomicOpValue &X, AtomicOpValue &V,
AtomicOrdering AO) {
@@ -8047,29 +8047,33 @@ OpenMPIRBuilder::createAtomicRead(const LocationDescription &Loc,
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
Twine Name(X.Var->getName());
- emitAtomicLoadBuiltin(X.Var,
- /*RetPtr=*/V.Var,
- /*IsVolatile=*/X.IsVolatile || V.IsVolatile,
- /*Memorder=*/AO,
- /*SyncScope=*/SyncScope::System,
- /*DataTy=*/XElemTy,
- /*DataSize=*/{},
- /*AvailableSize=*/{},
- /*Align=*/{},
- /*Builder=*/Builder,
- /*DL=*/DL,
- /*TLI=*/&TLI,
- /*TL=*/nullptr,
- /*SyncScopes=*/{},
- /*FallbackScope=*/StringRef(),
- /*Name=*/Name + ".atomic.read");
+ Error ALResult =
+ emitAtomicLoadBuiltin(X.Var,
+ /*RetPtr=*/V.Var,
+ /*IsVolatile=*/X.IsVolatile || V.IsVolatile,
+ /*Memorder=*/AO,
+ /*SyncScope=*/SyncScope::System,
+ /*DataTy=*/XElemTy,
+ /*DataSize=*/{},
+ /*AvailableSize=*/{},
+ /*Align=*/{},
+ /*Builder=*/Builder,
+ /*DL=*/DL,
+ /*TLI=*/&TLI,
+ /*TL=*/nullptr,
+ /*SyncScopes=*/{},
+ /*FallbackScope=*/StringRef(),
+ /*Name=*/Name + ".atomic.read");
+ if (ALResult)
+ return std::move(ALResult);
+
checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
// LoadInst *LoadedVal= Builder.CreateLoad(XElemTy, X.Var, Name );
return Builder.saveIP();
}
-OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::InsertPointOrErrorTy
OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
InsertPointTy AllocaIP, AtomicOpValue &X,
Value *Expr, AtomicOrdering AO) {
@@ -8096,22 +8100,24 @@ OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
Builder.restoreIP(ContIP);
Builder.CreateStore(Expr, ValPtr);
- emitAtomicStoreBuiltin(X.Var,
- /*ValPtr=*/ValPtr,
- /*IsVolatile=*/X.IsVolatile,
- /*Memorder=*/AO,
- /*SyncScope=*/SyncScope::System,
- /*DataTy=*/XElemTy,
- /*DataSize=*/{},
- /*AvailableSize=*/{},
- /*Align=*/{},
- /*Builder=*/Builder,
- /*DL=*/DL,
- /*TLI=*/&TLI,
- /*TL=*/nullptr,
- /*SyncScopes=*/{},
- /*FallbackScope=*/StringRef(),
- /*Name=*/Name + ".atomic.write");
+ Error ASResult = emitAtomicStoreBuiltin(X.Var,
+ /*ValPtr=*/ValPtr,
+ /*IsVolatile=*/X.IsVolatile,
+ /*Memorder=*/AO,
+ /*SyncScope=*/SyncScope::System,
+ /*DataTy=*/XElemTy,
+ /*DataSize=*/{},
+ /*AvailableSize=*/{},
+ /*Align=*/{},
+ /*Builder=*/Builder,
+ /*DL=*/DL,
+ /*TLI=*/&TLI,
+ /*TL=*/nullptr,
+ /*SyncScopes=*/{},
+ /*FallbackScope=*/StringRef(),
+ /*Name=*/Name + ".atomic.write");
+ if (ASResult)
+ return ASResult;
checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write);
return Builder.saveIP();
@@ -8225,44 +8231,47 @@ Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
Twine Name(X->getName());
- // Create new CFG.
- BasicBlock *ContBB = splitBB(Builder, true, X->getName() + ".atomic.cont");
- BasicBlock *ExitBB = splitBB(Builder, false, X->getName() + ".atomic.exit");
- auto ContIP = Builder.saveIP();
-
- // Reserve some stack space
+ // Reserve some stack space.
+ InsertPointTy InitIP = Builder.saveIP();
Builder.restoreIP(AllocaIP);
AllocaInst *OrigPtr =
Builder.CreateAlloca(XElemTy, nullptr, Name + ".atomic.orig.ptr");
AllocaInst *UpdPtr =
Builder.CreateAlloca(XElemTy, nullptr, Name + ".atomic.upd.ptr");
- AllocaInst *PrevPtr =
- Builder.CreateAlloca(XElemTy, nullptr, Name + ".atomic.upd.prev");
+ Builder.restoreIP(InitIP);
+
+ // Old value for first transaction. Every followup-transaction will use the
+ // prev value from cmpxchg.
+ Error ALResult = emitAtomicLoadBuiltin(X,
+ /*RetPtr=*/OrigPtr,
+ /*IsVolatile=*/false,
+ /*Memorder=*/AO,
+ /*SyncScope=*/SyncScope::System,
+ /*DataTy=*/XElemTy,
+ /*DataSize=*/{},
+ /*AvailableSize=*/{},
+ /*Align=*/{},
+ /*Builder=*/Builder,
+ /*DL=*/DL,
+ /*TLI=*/&TLI,
+ /*TL=*/nullptr,
+ /*SyncScopes=*/{},
+ /*FallbackScope=*/StringRef(),
+ /*Name=*/Name);
+ if (ALResult)
+ return std::move(ALResult);
+
+ // Create new CFG.
+ BasicBlock *ContBB = splitBB(Builder, true, X->getName() + ".atomic.cont");
+ BasicBlock *ExitBB = splitBB(Builder, false, X->getName() + ".atomic.exit");
+ InsertPointTy ContIP = Builder.saveIP();
- // Emit the update transaction.
+ // Emit the update transaction...
Builder.SetInsertPoint(ContBB);
- // 1. Get original value.
- emitAtomicLoadBuiltin(X,
- /*RetPtr=*/OrigPtr,
- /*IsVolatile=*/false,
- /*Memorder=*/AO,
- /*SyncScope=*/SyncScope::System,
- /*DataTy=*/XElemTy,
- /*DataSize=*/{},
- /*AvailableSize=*/{},
- /*Align=*/{},
- /*Builder=*/Builder,
- /*DL=*/DL,
- /*TLI=*/&TLI,
- /*TL=*/nullptr,
- /*SyncScopes=*/{},
- /*FallbackScope=*/StringRef(),
- /*Name=*/Name);
-
- // 2. Let the user code compute the new value.
+ // 1. Let the user code compute the new value.
// FIXME: This should not be done by-value, as the type might be unreasonable
- // large (e.g. i4096) and LLVM does not scale will with such large types.
+ // large (e.g. i4096) and LLVM does not scale well with such large types.
Value *OrigVal = Builder.CreateLoad(XElemTy, OrigPtr, Name + ".atomic.orig");
Expected<Value *> CBResult = UpdateOp(OrigVal, Builder);
if (!CBResult)
@@ -8270,8 +8279,8 @@ Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
Value *UpdVal = *CBResult;
Builder.CreateStore(UpdVal, UpdPtr);
- // 3. AtomicCompareExchange to replace OrigVal with UpdVal.
- Value *Success = emitAtomicCompareExchangeBuiltin(
+ // 2. AtomicCompareExchange to replace OrigVal with UpdVal.
+ Expected<Value *> ACEResult = emitAtomicCompareExchangeBuiltin(
/*Ptr=*/X,
/*ExpectedPtr=*/OrigPtr,
/*DesiredPtr=*/UpdPtr,
@@ -8279,7 +8288,7 @@ Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
/*IsVolatile=*/false,
/*SuccessMemorder=*/AO,
/*FailureMemorder=*/{},
- /*PrevPtr=*/PrevPtr,
+ /*PrevPtr=*/OrigPtr,
/*DataTy=*/XElemTy,
/*DataSize=*/{},
/*AvailableSize=*/{},
@@ -8289,15 +8298,17 @@ Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
/*TLI=*/&TLI,
/*TL=*/nullptr,
/*Name=*/Name);
+ if (!ACEResult)
+ return ACEResult.takeError();
+ Value *Success = *ACEResult;
- // 4. Repeat transaction until successful.
+ // 3. Repeat transaction until successful.
Builder.CreateCondBr(Success, ExitBB, ContBB);
// Continue when the update transaction was successful.
Builder.restoreIP(ContIP);
- Value *PrevVal = Builder.CreateLoad(XElemTy, PrevPtr, Name + ".atomic.prev");
- return std::make_pair(OrigVal, PrevVal);
+ return std::make_pair(OrigVal, UpdVal);
}
OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
diff --git a/llvm/lib/Transforms/Utils/BuildBuiltins.cpp b/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
index 43f21bdec38ff7..ce4ba8044b929f 100644
--- a/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
+++ b/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
@@ -68,676 +68,333 @@ constexpr bool holds_alternative_if_exists(const Variant &v) {
}
}
-} // namespace
-
-void llvm::emitAtomicLoadBuiltin(
- Value *Ptr, Value *RetPtr,
- // std::variant<Value *, bool> IsWeak,
- bool IsVolatile,
- std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> Memorder,
- std::variant<Value *, SyncScope::ID, StringRef> Scope, Type *DataTy,
- std::optional<uint64_t> DataSize, std::optional<uint64_t> AvailableSize,
- MaybeAlign Align, IRBuilderBase &Builder, const DataLayout &DL,
- const TargetLibraryInfo *TLI, const TargetLowering *TL,
- ArrayRef<std::pair<uint32_t, StringRef>> SyncScopes,
- StringRef FallbackScope, llvm::Twine Name, bool AllowInstruction,
- bool AllowSwitch, bool AllowSizedLibcall, bool AllowLibcall) {
- assert(Ptr->getType()->isPointerTy());
- assert(RetPtr->getType()->isPointerTy());
- assert(TLI);
-
- LLVMContext &Ctx = Builder.getContext();
- Function *CurFn = Builder.GetInsertBlock()->getParent();
-
- unsigned MaxAtomicSizeSupported = 16;
- if (TL)
- MaxAtomicSizeSupported = TL->getMaxAtomicSizeInBitsSupported() / 8;
-
- uint64_t DataSizeConst;
- if (DataSize) {
- DataSizeConst = *DataSize;
- } else {
- TypeSize DS = DL.getTypeStoreSize(DataTy);
- DataSizeConst = DS.getFixedValue();
- }
- uint64_t AvailableSizeConst = AvailableSize.value_or(DataSizeConst);
- assert(DataSizeConst <= AvailableSizeConst);
-
-#ifndef NDEBUG
- if (DataTy) {
- // 'long double' (80-bit extended precision) behaves strange here.
- // DL.getTypeStoreSize says it is 10 bytes
- // Clang says it is 12 bytes
- // AtomicExpandPass would disagree with CGAtomic (not for cmpxchg that does
- // not support floats, so AtomicExpandPass doesn't even know it originally
- // was an FP80)
- TypeSize DS = DL.getTypeStoreSize(DataTy);
- assert(DS.getKnownMinValue() <= DataSizeConst &&
- "Must access at least all the relevant bits of the data, possibly "
- "some more for padding");
- }
-#endif
-
- Type *BoolTy = Builder.getInt1Ty();
- Type *IntTy = getIntTy(Builder, TLI);
-
- uint64_t PreferredSize = PowerOf2Ceil(DataSizeConst);
- if (!PreferredSize || PreferredSize > MaxAtomicSizeSupported)
- PreferredSize = DataSizeConst;
+class AtomicEmitter {
+public:
+ AtomicEmitter(
+ Value *Ptr,
+ // Value *ExpectedPtr,
+ // Value *DesiredPtr,
+ std::variant<Value *, bool> IsWeak, bool IsVolatile,
+ std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder,
+ std::variant<std::monostate, Value *, AtomicOrdering, AtomicOrderingCABI>
+ FailureMemorder,
+ std::variant<Value *, SyncScope::ID, StringRef> Scope,
+ // Value *PrevPtr,
+ Type *DataTy, std::optional<uint64_t> DataSize,
+ std::optional<uint64_t> AvailableSize, MaybeAlign Align,
+ IRBuilderBase &Builder, const DataLayout &DL,
+ const TargetLibraryInfo *TLI, const TargetLowering *TL,
+ ArrayRef<std::pair<uint32_t, StringRef>> SyncScopes,
+ StringRef FallbackScope, llvm::Twine Name, bool AllowInstruction,
+ bool AllowSwitch, bool AllowSizedLibcall, bool AllowLibcall)
+ : Ctx(Builder.getContext()), CurFn(Builder.GetInsertBlock()->getParent()),
+ Ptr(Ptr), IsWeak(IsWeak), IsVolatile(IsVolatile),
+ SuccessMemorder(SuccessMemorder), FailureMemorder(FailureMemorder),
+ Scope(Scope), DataTy(DataTy), DataSize(DataSize),
+ AvailableSize(AvailableSize), Align(Align), Builder(Builder), DL(DL),
+ TLI(TLI), TL(TL), SyncScopes(SyncScopes), FallbackScope(FallbackScope),
+ Name(FallbackScope), AllowInstruction(AllowInstruction),
+ AllowSwitch(AllowSwitch), AllowSizedLibcall(AllowSizedLibcall),
+ AllowLibcall(AllowLibcall) {}
+
+protected:
+ LLVMContext &Ctx;
+ Function *CurFn;
+
+ Value *Ptr;
+ std::variant<Value *, bool> IsWeak;
+ bool IsVolatile;
+ std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder;
+ std::variant<std::monostate, Value *, AtomicOrdering, AtomicOrderingCABI>
+ FailureMemorder;
+ std::variant<Value *, SyncScope::ID, StringRef> Scope;
+ Type *DataTy;
+ std::optional<uint64_t> DataSize;
+ std::optional<uint64_t> AvailableSize;
+ MaybeAlign Align;
+ IRBuilderBase &Builder;
+ const DataLayout &DL;
+ const TargetLibraryInfo *TLI;
+ const TargetLowering *TL;
+ ArrayRef<std::pair<uint32_t, StringRef>> SyncScopes;
+ StringRef FallbackScope;
+ llvm::Twine Name;
+ bool AllowInstruction;
+ bool AllowSwitch;
+ bool AllowSizedLibcall;
+ bool AllowLibcall;
- llvm::Align EffectiveAlign;
- if (Align) {
- EffectiveAlign = *Align;
- } else {
- // https://llvm.org/docs/LangRef.html#cmpxchg-instruction
- //
- // The alignment is only optional when parsing textual IR; for in-memory
- // IR, it is always present. If unspecified, the alignment is assumed to
- // be equal to the size of the ‘<value>’ type.
- //
- // We prefer safety here and assume no alignment, unless
- // getPointerAlignment() can determine the actual alignment.
- EffectiveAlign = Ptr->getPointerAlignment(DL);
- }
-
- // Only use the original data type if it is compatible with cmpxchg (and sized
- // libcall function) and matches the preferred size. No type punning needed
- // for __atomic_compare_exchange which only takes pointers.
Type *CoercedTy = nullptr;
- if (DataTy && DataSizeConst == PreferredSize &&
- (DataTy->isIntegerTy() || DataTy->isPointerTy()))
- CoercedTy = DataTy;
- else if (PreferredSize <= 16)
- CoercedTy = IntegerType::get(Ctx, PreferredSize * 8);
-
- // For resolving the SuccessMemorder/FailureMemorder arguments. If it is
- // constant, determine the AtomicOrdering for use with the cmpxchg
- // instruction. Also determines the llvm::Value to be passed to
- // __atomic_compare_exchange in case cmpxchg is not legal.
- auto processMemorder = [&](auto MemorderVariant)
- -> std::pair<std::optional<AtomicOrdering>, Value *> {
- if (std::holds_alternative<AtomicOrdering>(MemorderVariant)) {
- auto Memorder = std::get<AtomicOrdering>(MemorderVariant);
- return std::make_pair(
- Memorder,
- ConstantInt::get(IntTy, static_cast<uint64_t>(toCABI(Memorder))));
- }
-
- if (std::holds_alternative<AtomicOrderingCABI>(MemorderVariant)) {
- auto MemorderCABI = std::get<AtomicOrderingCABI>(MemorderVariant);
- return std::make_pair(
- fromCABI(MemorderCABI),
- ConstantInt::get(IntTy, static_cast<uint64_t>(MemorderCABI)));
- }
-
- auto *MemorderCABI = std::get<Value *>(MemorderVariant);
- if (auto *MO = dyn_cast<ConstantInt>(MemorderCABI)) {
- uint64_t MOInt = MO->getZExtValue();
- return std::make_pair(fromCABI(MOInt), MO);
- }
-
- return std::make_pair(std::nullopt, MemorderCABI);
- };
-
- auto processScope = [&](auto ScopeVariant)
- -> std::pair<std::optional<SyncScope::ID>, Value *> {
- if (std::holds_alternative<SyncScope::ID>(ScopeVariant)) {
- auto ScopeID = std::get<SyncScope::ID>(ScopeVariant);
- return std::make_pair(ScopeID, nullptr);
- }
-
- if (std::holds_alternative<StringRef>(ScopeVariant)) {
- auto ScopeName = std::get<StringRef>(ScopeVariant);
- SyncScope::ID ScopeID = Ctx.getOrInsertSyncScopeID(ScopeName);
- return std::make_pair(ScopeID, nullptr);
- }
-
- auto *IntVal = std::get<Value *>(ScopeVariant);
- if (auto *InstConst = dyn_cast<ConstantInt>(IntVal)) {
- uint64_t ScopeVal = InstConst->getZExtValue();
- return std::make_pair(ScopeVal, IntVal);
- }
-
- return std::make_pair(std::nullopt, IntVal);
- };
-
- // auto [IsWeakConst, IsWeakVal] = processIsWeak(IsWeak);
- auto [MemorderConst, MemorderCABI] = processMemorder(Memorder);
- auto [ScopeConst, ScopeVal] = processScope(Scope);
-
- // https://llvm.org/docs/LangRef.html#cmpxchg-instruction
- //
- // The type of ‘<cmp>’ must be an integer or pointer type whose bit width is
- // a power of two greater than or equal to eight and less than or equal to a
- // target-specific size limit.
- bool CanUseAtomicLoadInst = PreferredSize <= MaxAtomicSizeSupported &&
- llvm::isPowerOf2_64(PreferredSize) && CoercedTy;
- bool CanUseSingleAtomicLoadInst = CanUseAtomicLoadInst &&
- MemorderConst.has_value() // && IsWeakConst
- && ScopeConst;
- bool CanUseSizedLibcall =
- canUseSizedAtomicCall(PreferredSize, EffectiveAlign, DL) &&
- ScopeConst == SyncScope::System;
- bool CanUseLibcall = ScopeConst == SyncScope::System;
-
- Value *ExpectedVal;
- Value *DesiredVal;
-
- // Emit load instruction, either as a single instruction, or as a case of a
- // per-constant switch.
- auto EmitAtomicLoadInst = [&](SyncScope::ID Scope, AtomicOrdering Memorder) {
- LoadInst *AtomicInst =
- Builder.CreateLoad(CoercedTy, Ptr, IsVolatile, Name + ".atomic.load");
- AtomicInst->setAtomic(Memorder, Scope);
- AtomicInst->setAlignment(EffectiveAlign);
- AtomicInst->setVolatile(IsVolatile);
-
- // Store loaded result to where the caller expects it.
- // FIXME: Do we need to zero the padding, if any?
- Builder.CreateStore(AtomicInst, RetPtr, IsVolatile);
- };
-
- if (CanUseSingleAtomicLoadInst && AllowInstruction) {
- return EmitAtomicLoadInst(*ScopeConst, *MemorderConst);
- }
-
- if (CanUseAtomicLoadInst && AllowSwitch && AllowInstruction) {
- auto createBasicBlock = [&](const Twine &BBName) {
- return BasicBlock::Create(Ctx, Name + BBName, CurFn);
- };
-
- auto GenMemorderSwitch = [&](SyncScope::ID Scope) {
- if (MemorderConst)
- return EmitAtomicLoadInst(Scope, *MemorderConst);
-
- // Create all the relevant BB's
- BasicBlock *MonotonicBB = createBasicBlock(".monotonic");
- BasicBlock *AcquireBB = createBasicBlock(".acquire");
- BasicBlock *ReleaseBB = createBasicBlock(".release");
- BasicBlock *AcqRelBB = createBasicBlock(".acqrel");
- BasicBlock *SeqCstBB = createBasicBlock(".seqcst");
- BasicBlock *ContBB = createBasicBlock(".atomic.continue");
-
- // Create the switch for the split
- // MonotonicBB is arbitrarily chosen as the default case; in practice,
- // this doesn't matter unless someone is crazy enough to use something
- // that doesn't fold to a constant for the ordering.
- Value *Order =
- Builder.CreateIntCast(MemorderCABI, Builder.getInt32Ty(), false);
- llvm::SwitchInst *SI = Builder.CreateSwitch(Order, MonotonicBB);
-
- Builder.SetInsertPoint(ContBB);
-
- // Emit all the different atomics
- Builder.SetInsertPoint(MonotonicBB);
- EmitAtomicLoadInst(Scope, AtomicOrdering::Monotonic);
- Builder.CreateBr(ContBB);
-
- Builder.SetInsertPoint(AcquireBB);
- EmitAtomicLoadInst(Scope, AtomicOrdering::Acquire);
- Builder.CreateBr(ContBB);
- SI->addCase(
- Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::consume)),
- Builder.GetInsertBlock());
- SI->addCase(
- Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::acquire)),
- Builder.GetInsertBlock());
-
- Builder.SetInsertPoint(ReleaseBB);
- EmitAtomicLoadInst(Scope, AtomicOrdering::Release);
- Builder.CreateBr(ContBB);
- SI->addCase(
- Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::release)),
- Builder.GetInsertBlock());
-
- Builder.SetInsertPoint(AcqRelBB);
- EmitAtomicLoadInst(Scope, AtomicOrdering::AcquireRelease);
- Builder.CreateBr(ContBB);
- SI->addCase(
- Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::acq_rel)),
- AcqRelBB);
-
- Builder.SetInsertPoint(SeqCstBB);
- EmitAtomicLoadInst(Scope, AtomicOrdering::SequentiallyConsistent);
- Builder.CreateBr(ContBB);
- SI->addCase(
- Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::seq_cst)),
- SeqCstBB);
-
- Builder.SetInsertPoint(ContBB);
- return;
- };
-
- auto GenScopeSwitch = [&]() {
- if (ScopeConst)
- return GenMemorderSwitch(*ScopeConst);
-
- // Handle non-constant scope.
- DenseMap<unsigned, BasicBlock *> BB;
- for (const auto &S : SyncScopes) {
- if (FallbackScope == S.second)
- continue; // always the default case
- BB[S.first] = createBasicBlock(Twine(".atomic.scope.") + S.second);
- }
-
- BasicBlock *DefaultBB = createBasicBlock(".atomic.scope.fallback");
- BasicBlock *ContBB = createBasicBlock(".atomic.scope.continue");
-
- Builder.SetInsertPoint(ContBB);
-
- Value *SC = Builder.CreateIntCast(ScopeVal, Builder.getInt32Ty(),
- /*IsSigned=*/false,
- Name + ".atomic.scope.cast");
- // If unsupported synch scope is encountered at run time, assume a
- // fallback synch scope value.
- SwitchInst *SI = Builder.CreateSwitch(SC, DefaultBB);
- for (const auto &S : SyncScopes) {
- BasicBlock *B = BB[S.first];
- SI->addCase(Builder.getInt32(S.first), B);
-
- Builder.SetInsertPoint(B);
- SyncScope::ID SyncScopeID = Ctx.getOrInsertSyncScopeID(S.second);
- GenMemorderSwitch(SyncScopeID);
- Builder.CreateBr(ContBB);
- }
-
- Builder.SetInsertPoint(DefaultBB);
- SyncScope::ID SyncScopeID = Ctx.getOrInsertSyncScopeID(FallbackScope);
- GenMemorderSwitch(SyncScopeID);
- Builder.CreateBr(ContBB);
-
- Builder.SetInsertPoint(ContBB);
- return;
- };
-
- return GenScopeSwitch();
- }
-
- if (CanUseSizedLibcall && AllowSizedLibcall) {
- Value *LoadResult =
- emitAtomicLoadN(PreferredSize, Ptr, MemorderCABI, Builder, DL, TLI);
- LoadResult->setName(Name);
- if (LoadResult) {
- Builder.CreateStore(LoadResult, RetPtr);
- return;
- }
-
- // emitAtomicLoadN can return nullptr if the backend does not
- // support sized libcalls. Fall back to the non-sized libcall and remove the
- // unused load again.
- }
-
- if (CanUseLibcall && AllowLibcall) {
- // Fallback to a libcall function. From here on IsWeak/Scope/IsVolatile is
- // ignored. IsWeak is assumed to be false, Scope is assumed to be
- // SyncScope::System (strongest possible assumption synchronizing with
- // everything, instead of just a subset of sibling threads), and volatile
- // does not apply to function calls.
-
- Value *DataSizeVal =
- ConstantInt::get(getSizeTTy(Builder, TLI), DataSizeConst);
- Value *LoadCall = emitAtomicLoad(DataSizeVal, Ptr, RetPtr, MemorderCABI,
- Builder, DL, TLI);
- if (LoadCall) {
- LoadCall->setName(Name);
- return;
- }
- }
-
- report_fatal_error(
- "__atomic_load builtin not supported by any available means");
-}
-
-void llvm::emitAtomicStoreBuiltin(
- Value *Ptr, Value *ValPtr,
- // std::variant<Value *, bool> IsWeak,
- bool IsVolatile,
- std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> Memorder,
- std::variant<Value *, SyncScope::ID, StringRef> Scope, Type *DataTy,
- std::optional<uint64_t> DataSize, std::optional<uint64_t> AvailableSize,
- MaybeAlign Align, IRBuilderBase &Builder, const DataLayout &DL,
- const TargetLibraryInfo *TLI, const TargetLowering *TL,
- ArrayRef<std::pair<uint32_t, StringRef>> SyncScopes,
- StringRef FallbackScope, llvm::Twine Name, bool AllowInstruction,
- bool AllowSwitch, bool AllowSizedLibcall, bool AllowLibcall) {
- assert(Ptr->getType()->isPointerTy());
- assert(ValPtr->getType()->isPointerTy());
- assert(TLI);
-
- LLVMContext &Ctx = Builder.getContext();
- Function *CurFn = Builder.GetInsertBlock()->getParent();
-
- unsigned MaxAtomicSizeSupported = 16;
- if (TL)
- MaxAtomicSizeSupported = TL->getMaxAtomicSizeInBitsSupported() / 8;
-
uint64_t DataSizeConst;
- if (DataSize) {
- DataSizeConst = *DataSize;
- } else {
- TypeSize DS = DL.getTypeStoreSize(DataTy);
- DataSizeConst = DS.getFixedValue();
- }
- uint64_t AvailableSizeConst = AvailableSize.value_or(DataSizeConst);
- assert(DataSizeConst <= AvailableSizeConst);
-
-#ifndef NDEBUG
- if (DataTy) {
- // 'long double' (80-bit extended precision) behaves strange here.
- // DL.getTypeStoreSize says it is 10 bytes
- // Clang says it is 12 bytes
- // AtomicExpandPass would disagree with CGAtomic (not for cmpxchg that does
- // not support floats, so AtomicExpandPass doesn't even know it originally
- // was an FP80)
- TypeSize DS = DL.getTypeStoreSize(DataTy);
- assert(DS.getKnownMinValue() <= DataSizeConst &&
- "Must access at least all the relevant bits of the data, possibly "
- "some more for padding");
- }
-#endif
-
- Type *BoolTy = Builder.getInt1Ty();
- Type *IntTy = getIntTy(Builder, TLI);
-
- uint64_t PreferredSize = PowerOf2Ceil(DataSizeConst);
- if (!PreferredSize || PreferredSize > MaxAtomicSizeSupported)
- PreferredSize = DataSizeConst;
-
llvm::Align EffectiveAlign;
- if (Align) {
- EffectiveAlign = *Align;
- } else {
- // https://llvm.org/docs/LangRef.html#cmpxchg-instruction
- //
- // The alignment is only optional when parsing textual IR; for in-memory
- // IR, it is always present. If unspecified, the alignment is assumed to
- // be equal to the size of the ‘<value>’ type.
- //
- // We prefer safety here and assume no alignment, unless
- // getPointerAlignment() can determine the actual alignment.
- EffectiveAlign = Ptr->getPointerAlignment(DL);
- }
-
- // Only use the original data type if it is compatible with cmpxchg (and sized
- // libcall function) and matches the preferred size. No type punning needed
- // for __atomic_compare_exchange which only takes pointers.
- Type *CoercedTy = nullptr;
- if (DataTy && DataSizeConst == PreferredSize &&
- (DataTy->isIntegerTy() || DataTy->isPointerTy()))
- CoercedTy = DataTy;
- else if (PreferredSize <= 16)
- CoercedTy = IntegerType::get(Ctx, PreferredSize * 8);
-
- // For resolving the SuccessMemorder/FailureMemorder arguments. If it is
- // constant, determine the AtomicOrdering for use with the cmpxchg
- // instruction. Also determines the llvm::Value to be passed to
- // __atomic_compare_exchange in case cmpxchg is not legal.
- auto processMemorder = [&](auto MemorderVariant)
- -> std::pair<std::optional<AtomicOrdering>, Value *> {
- if (std::holds_alternative<AtomicOrdering>(MemorderVariant)) {
- auto Memorder = std::get<AtomicOrdering>(MemorderVariant);
- return std::make_pair(
- Memorder,
- ConstantInt::get(IntTy, static_cast<uint64_t>(toCABI(Memorder))));
- }
-
- if (std::holds_alternative<AtomicOrderingCABI>(MemorderVariant)) {
- auto MemorderCABI = std::get<AtomicOrderingCABI>(MemorderVariant);
- return std::make_pair(
- fromCABI(MemorderCABI),
- ConstantInt::get(IntTy, static_cast<uint64_t>(MemorderCABI)));
- }
-
- auto *MemorderCABI = std::get<Value *>(MemorderVariant);
- if (auto *MO = dyn_cast<ConstantInt>(MemorderCABI)) {
- uint64_t MOInt = MO->getZExtValue();
- return std::make_pair(fromCABI(MOInt), MO);
- }
-
- return std::make_pair(std::nullopt, MemorderCABI);
+ uint64_t PreferredSize;
+ std::optional<AtomicOrdering> SuccessMemorderConst;
+ Value *SuccessMemorderCABI;
+ std::optional<AtomicOrdering> FailureMemorderConst;
+ Value *FailureMemorderCABI;
+ std::optional<SyncScope::ID> ScopeConst;
+ Value *ScopeVal;
+ std::optional<bool> IsWeakConst;
+ Value *IsWeakVal;
+ // Value *ExpectedVal;
+ // Value *DesiredVal;
+
+ BasicBlock *createBasicBlock(const Twine &BBName) {
+ return BasicBlock::Create(Ctx, Name + BBName, CurFn);
};
- auto processScope = [&](auto ScopeVariant)
- -> std::pair<std::optional<SyncScope::ID>, Value *> {
- if (std::holds_alternative<SyncScope::ID>(ScopeVariant)) {
- auto ScopeID = std::get<SyncScope::ID>(ScopeVariant);
- return std::make_pair(ScopeID, nullptr);
- }
-
- if (std::holds_alternative<StringRef>(ScopeVariant)) {
- auto ScopeName = std::get<StringRef>(ScopeVariant);
- SyncScope::ID ScopeID = Ctx.getOrInsertSyncScopeID(ScopeName);
- return std::make_pair(ScopeID, nullptr);
- }
-
- auto *IntVal = std::get<Value *>(ScopeVariant);
- if (auto *InstConst = dyn_cast<ConstantInt>(IntVal)) {
- uint64_t ScopeVal = InstConst->getZExtValue();
- return std::make_pair(ScopeVal, IntVal);
+ virtual void prepareInst() {}
+
+ virtual Value *emitInst(bool IsWeak, SyncScope::ID Scope,
+ AtomicOrdering SuccessMemorder,
+ AtomicOrdering FailureMemorder) = 0;
+
+ Value *emitFailureMemorderSwitch(bool IsWeak, SyncScope::ID Scope,
+ AtomicOrdering SuccessMemorder) {
+ if (FailureMemorderConst) {
+ // FIXME: (from CGAtomic)
+ // 31.7.2.18: "The failure argument shall not be memory_order_release
+ // nor memory_order_acq_rel". Fallback to monotonic.
+ //
+ // Prior to c++17, "the failure argument shall be no stronger than the
+ // success argument". This condition has been lifted and the only
+ // precondition is 31.7.2.18. Effectively treat this as a DR and skip
+ // language version checks.
+ return emitInst(IsWeak, Scope, SuccessMemorder, *FailureMemorderConst);
}
- return std::make_pair(std::nullopt, IntVal);
+ Type *BoolTy = Builder.getInt1Ty();
+
+ // Create all the relevant BB's
+ BasicBlock *MonotonicBB = createBasicBlock("monotonic_fail");
+ BasicBlock *AcquireBB = createBasicBlock("acquire_fail");
+ BasicBlock *SeqCstBB = createBasicBlock("seqcst_fail");
+ BasicBlock *ContBB = createBasicBlock("atomic.continue");
+
+ // MonotonicBB is arbitrarily chosen as the default case; in practice,
+ // this doesn't matter unless someone is crazy enough to use something
+ // that doesn't fold to a constant for the ordering.
+ llvm::SwitchInst *SI =
+ Builder.CreateSwitch(FailureMemorderCABI, MonotonicBB);
+ // Implemented as acquire, since it's the closest in LLVM.
+ SI->addCase(
+ Builder.getInt32(static_cast<int32_t>(AtomicOrderingCABI::consume)),
+ AcquireBB);
+ SI->addCase(
+ Builder.getInt32(static_cast<int32_t>(AtomicOrderingCABI::acquire)),
+ AcquireBB);
+ SI->addCase(
+ Builder.getInt32(static_cast<int32_t>(AtomicOrderingCABI::seq_cst)),
+ SeqCstBB);
+
+ // Emit all the different atomics
+ Builder.SetInsertPoint(MonotonicBB);
+ Value *MonotonicResult =
+ emitInst(IsWeak, Scope, SuccessMemorder, AtomicOrdering::Monotonic);
+ BasicBlock *MonotonicSourceBB = Builder.GetInsertBlock();
+ Builder.CreateBr(ContBB);
+
+ Builder.SetInsertPoint(AcquireBB);
+ Value *AcquireResult =
+ emitInst(IsWeak, Scope, SuccessMemorder, AtomicOrdering::Acquire);
+ BasicBlock *AcquireSourceBB = Builder.GetInsertBlock();
+ Builder.CreateBr(ContBB);
+
+ Builder.SetInsertPoint(SeqCstBB);
+ Value *SeqCstResult = emitInst(IsWeak, Scope, SuccessMemorder,
+ AtomicOrdering::SequentiallyConsistent);
+ BasicBlock *SeqCstSourceBB = Builder.GetInsertBlock();
+ Builder.CreateBr(ContBB);
+
+ Builder.SetInsertPoint(ContBB);
+ PHINode *Result = Builder.CreatePHI(BoolTy, 3, Name + ".cmpxchg.success");
+ Result->addIncoming(MonotonicResult, MonotonicSourceBB);
+ Result->addIncoming(AcquireResult, AcquireSourceBB);
+ Result->addIncoming(SeqCstResult, SeqCstSourceBB);
+ return Result;
};
-
- // auto [IsWeakConst, IsWeakVal] = processIsWeak(IsWeak);
- auto [MemorderConst, MemorderCABI] = processMemorder(Memorder);
- auto [ScopeConst, ScopeVal] = processScope(Scope);
-
- // https://llvm.org/docs/LangRef.html#cmpxchg-instruction
- //
- // The type of ‘<cmp>’ must be an integer or pointer type whose bit width is
- // a power of two greater than or equal to eight and less than or equal to a
- // target-specific size limit.
- bool CanUseAtomicLoadInst = PreferredSize <= MaxAtomicSizeSupported &&
- llvm::isPowerOf2_64(PreferredSize) && CoercedTy;
- bool CanUseSingleAtomicLoadInst = CanUseAtomicLoadInst &&
- MemorderConst.has_value() // && IsWeakConst
- && ScopeConst;
- bool CanUseSizedLibcall =
- canUseSizedAtomicCall(PreferredSize, EffectiveAlign, DL) &&
- ScopeConst == SyncScope::System;
- bool CanUseLibcall = ScopeConst == SyncScope::System;
-
- Value *ExpectedVal;
- Value *DesiredVal;
-
- LoadInst *Val;
-
- // Emit load instruction, either as a single instruction, or as a case of a
- // per-constant switch.
- auto EmitAtomicStoreInst = [&](SyncScope::ID Scope, AtomicOrdering Memorder) {
- StoreInst *AtomicInst = Builder.CreateStore(Val, Ptr, IsVolatile);
- AtomicInst->setAtomic(Memorder, Scope);
- AtomicInst->setAlignment(EffectiveAlign);
- AtomicInst->setVolatile(IsVolatile);
+
+ Value *emitSuccessMemorderSwitch(bool IsWeak, SyncScope::ID Scope) {
+ if (SuccessMemorderConst)
+ return emitFailureMemorderSwitch(IsWeak, Scope, *SuccessMemorderConst);
+
+ Type *BoolTy = Builder.getInt1Ty();
+
+ // Create all the relevant BB's
+ BasicBlock *MonotonicBB = createBasicBlock(".monotonic");
+ BasicBlock *AcquireBB = createBasicBlock(".acquire");
+ BasicBlock *ReleaseBB = createBasicBlock(".release");
+ BasicBlock *AcqRelBB = createBasicBlock(".acqrel");
+ BasicBlock *SeqCstBB = createBasicBlock(".seqcst");
+ BasicBlock *ContBB = createBasicBlock(".atomic.continue");
+
+ // Create the switch for the split
+ // MonotonicBB is arbitrarily chosen as the default case; in practice,
+ // this doesn't matter unless someone is crazy enough to use something
+ // that doesn't fold to a constant for the ordering.
+ Value *Order =
+ Builder.CreateIntCast(SuccessMemorderCABI, Builder.getInt32Ty(), false);
+ llvm::SwitchInst *SI = Builder.CreateSwitch(Order, MonotonicBB);
+
+ Builder.SetInsertPoint(ContBB);
+ PHINode *Result = Builder.CreatePHI(BoolTy, 5, Name + ".cmpxchg.success");
+
+ // Emit all the different atomics
+ Builder.SetInsertPoint(MonotonicBB);
+ Value *MonotonicResult =
+ emitFailureMemorderSwitch(IsWeak, Scope, AtomicOrdering::Monotonic);
+ Result->addIncoming(MonotonicResult, MonotonicBB);
+ Builder.CreateBr(ContBB);
+
+ Builder.SetInsertPoint(AcquireBB);
+ Value *AcquireResult =
+ emitFailureMemorderSwitch(IsWeak, Scope, AtomicOrdering::Acquire);
+ Builder.CreateBr(ContBB);
+ SI->addCase(
+ Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::consume)),
+ Builder.GetInsertBlock());
+ SI->addCase(
+ Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::acquire)),
+ Builder.GetInsertBlock());
+ Result->addIncoming(AcquireResult, AcquireBB);
+
+ Builder.SetInsertPoint(ReleaseBB);
+ Value *ReleaseResult =
+ emitFailureMemorderSwitch(IsWeak, Scope, AtomicOrdering::Release);
+ Builder.CreateBr(ContBB);
+ SI->addCase(
+ Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::release)),
+ Builder.GetInsertBlock());
+ Result->addIncoming(ReleaseResult, Builder.GetInsertBlock());
+
+ Builder.SetInsertPoint(AcqRelBB);
+ Value *AcqRelResult = emitFailureMemorderSwitch(
+ IsWeak, Scope, AtomicOrdering::AcquireRelease);
+ Builder.CreateBr(ContBB);
+ SI->addCase(
+ Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::acq_rel)),
+ AcqRelBB);
+ Result->addIncoming(AcqRelResult, Builder.GetInsertBlock());
+
+ Builder.SetInsertPoint(SeqCstBB);
+ Value *SeqCstResult = emitFailureMemorderSwitch(
+ IsWeak, Scope, AtomicOrdering::SequentiallyConsistent);
+ Builder.CreateBr(ContBB);
+ SI->addCase(
+ Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::seq_cst)),
+ SeqCstBB);
+ Result->addIncoming(SeqCstResult, Builder.GetInsertBlock());
+
+ Builder.SetInsertPoint(Result->getNextNode());
+ return Result;
};
- if (CanUseSingleAtomicLoadInst && AllowInstruction) {
- Val = Builder.CreateLoad(CoercedTy, ValPtr, Name + ".atomic.val");
- return EmitAtomicStoreInst(*ScopeConst, *MemorderConst);
- }
+ Value *emitScopeSwitch(bool IsWeak) {
+ if (ScopeConst)
+ return emitSuccessMemorderSwitch(IsWeak, *ScopeConst);
- if (CanUseAtomicLoadInst && AllowSwitch && AllowInstruction) {
- Val = Builder.CreateLoad(CoercedTy, ValPtr, Name + ".atomic.val");
+ Type *BoolTy = Builder.getInt1Ty();
- auto createBasicBlock = [&](const Twine &BBName) {
- return BasicBlock::Create(Ctx, Name + BBName, CurFn);
- };
-
- auto GenMemorderSwitch = [&](SyncScope::ID Scope) {
- if (MemorderConst)
- return EmitAtomicStoreInst(Scope, *MemorderConst);
-
- // Create all the relevant BB's
- BasicBlock *MonotonicBB = createBasicBlock(".monotonic");
- BasicBlock *AcquireBB = createBasicBlock(".acquire");
- BasicBlock *ReleaseBB = createBasicBlock(".release");
- BasicBlock *AcqRelBB = createBasicBlock(".acqrel");
- BasicBlock *SeqCstBB = createBasicBlock(".seqcst");
- BasicBlock *ContBB = createBasicBlock(".atomic.continue");
-
- // Create the switch for the split
- // MonotonicBB is arbitrarily chosen as the default case; in practice,
- // this doesn't matter unless someone is crazy enough to use something
- // that doesn't fold to a constant for the ordering.
- Value *Order =
- Builder.CreateIntCast(MemorderCABI, Builder.getInt32Ty(), false);
- llvm::SwitchInst *SI = Builder.CreateSwitch(Order, MonotonicBB);
-
- Builder.SetInsertPoint(ContBB);
-
- // Emit all the different atomics
- Builder.SetInsertPoint(MonotonicBB);
- EmitAtomicStoreInst(Scope, AtomicOrdering::Monotonic);
- Builder.CreateBr(ContBB);
+ // Handle non-constant scope.
+ DenseMap<unsigned, BasicBlock *> BB;
+ for (const auto &S : SyncScopes) {
+ if (FallbackScope == S.second)
+ continue; // always the default case
+ BB[S.first] = createBasicBlock(Twine(".cmpxchg.scope.") + S.second);
+ }
- Builder.SetInsertPoint(AcquireBB);
- EmitAtomicStoreInst(Scope, AtomicOrdering::Acquire);
- Builder.CreateBr(ContBB);
- SI->addCase(
- Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::consume)),
- Builder.GetInsertBlock());
- SI->addCase(
- Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::acquire)),
- Builder.GetInsertBlock());
-
- Builder.SetInsertPoint(ReleaseBB);
- EmitAtomicStoreInst(Scope, AtomicOrdering::Release);
- Builder.CreateBr(ContBB);
- SI->addCase(
- Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::release)),
- Builder.GetInsertBlock());
+ BasicBlock *DefaultBB = createBasicBlock(".cmpxchg.scope.fallback");
+ BasicBlock *ContBB = createBasicBlock(".cmpxchg.scope.continue");
- Builder.SetInsertPoint(AcqRelBB);
- EmitAtomicStoreInst(Scope, AtomicOrdering::AcquireRelease);
- Builder.CreateBr(ContBB);
- SI->addCase(
- Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::acq_rel)),
- AcqRelBB);
+ Builder.SetInsertPoint(ContBB);
+ PHINode *Result = Builder.CreatePHI(BoolTy, SyncScopes.size() + 1,
+ Name + ".cmpxchg.success");
- Builder.SetInsertPoint(SeqCstBB);
- EmitAtomicStoreInst(Scope, AtomicOrdering::SequentiallyConsistent);
- Builder.CreateBr(ContBB);
- SI->addCase(
- Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::seq_cst)),
- SeqCstBB);
-
- Builder.SetInsertPoint(ContBB);
- return;
- };
-
- auto GenScopeSwitch = [&]() {
- if (ScopeConst)
- return GenMemorderSwitch(*ScopeConst);
-
- // Handle non-constant scope.
- DenseMap<unsigned, BasicBlock *> BB;
- for (const auto &S : SyncScopes) {
- if (FallbackScope == S.second)
- continue; // always the default case
- BB[S.first] = createBasicBlock(Twine(".atomic.scope.") + S.second);
- }
+ Value *SC =
+ Builder.CreateIntCast(ScopeVal, Builder.getInt32Ty(),
+ /*IsSigned*/ false, Name + ".cmpxchg.scope.cast");
- BasicBlock *DefaultBB = createBasicBlock(".atomic.scope.fallback");
- BasicBlock *ContBB = createBasicBlock(".atomic.scope.continue");
-
- Builder.SetInsertPoint(ContBB);
-
- Value *SC = Builder.CreateIntCast(ScopeVal, Builder.getInt32Ty(),
- /*IsSigned=*/false,
- Name + ".atomic.scope.cast");
- // If unsupported synch scope is encountered at run time, assume a
- // fallback synch scope value.
- SwitchInst *SI = Builder.CreateSwitch(SC, DefaultBB);
- for (const auto &S : SyncScopes) {
- BasicBlock *B = BB[S.first];
- SI->addCase(Builder.getInt32(S.first), B);
-
- Builder.SetInsertPoint(B);
- SyncScope::ID SyncScopeID = Ctx.getOrInsertSyncScopeID(S.second);
- GenMemorderSwitch(SyncScopeID);
- Builder.CreateBr(ContBB);
- }
+ // If unsupported synch scope is encountered at run time, assume a
+ // fallback synch scope value.
+ SwitchInst *SI = Builder.CreateSwitch(SC, DefaultBB);
+ for (const auto &S : SyncScopes) {
+ BasicBlock *B = BB[S.first];
+ SI->addCase(Builder.getInt32(S.first), B);
- Builder.SetInsertPoint(DefaultBB);
- SyncScope::ID SyncScopeID = Ctx.getOrInsertSyncScopeID(FallbackScope);
- GenMemorderSwitch(SyncScopeID);
+ Builder.SetInsertPoint(B);
+ SyncScope::ID SyncScopeID = Ctx.getOrInsertSyncScopeID(S.second);
+ Value *SyncResult = emitSuccessMemorderSwitch(IsWeak, SyncScopeID);
+ Result->addIncoming(SyncResult, Builder.GetInsertBlock());
Builder.CreateBr(ContBB);
+ }
- Builder.SetInsertPoint(ContBB);
- return;
- };
-
- return GenScopeSwitch();
- }
-
- if (CanUseSizedLibcall && AllowSizedLibcall) {
- Val = Builder.CreateLoad(CoercedTy, ValPtr, Name + ".atomic.val");
- Value *StoreCall = emitAtomicStoreN(DataSizeConst, Ptr, Val, MemorderCABI,
- Builder, DL, TLI);
- StoreCall->setName(Name);
- if (StoreCall)
- return;
+ Builder.SetInsertPoint(DefaultBB);
+ SyncScope::ID SyncScopeID = Ctx.getOrInsertSyncScopeID(FallbackScope);
+ Value *DefaultResult = emitSuccessMemorderSwitch(IsWeak, SyncScopeID);
+ Result->addIncoming(DefaultResult, Builder.GetInsertBlock());
+ Builder.CreateBr(ContBB);
- // emitAtomiStoreN can return nullptr if the backend does not
- // support sized libcalls. Fall back to the non-sized libcall and remove the
- // unused load again.
- }
+ Builder.SetInsertPoint(Result->getNextNode());
+ return Result;
+ };
- if (CanUseLibcall && AllowLibcall) {
- // Fallback to a libcall function. From here on IsWeak/Scope/IsVolatile is
- // ignored. IsWeak is assumed to be false, Scope is assumed to be
- // SyncScope::System (strongest possible assumption synchronizing with
- // everything, instead of just a subset of sibling threads), and volatile
- // does not apply to function calls.
+ Value *emitWeakSwitch() {
+ if (IsWeakConst)
+ return emitScopeSwitch(*IsWeakConst);
+
+ Type *BoolTy = Builder.getInt1Ty();
+
+ // Create all the relevant BB's
+ BasicBlock *StrongBB = createBasicBlock(".cmpxchg.strong");
+ BasicBlock *WeakBB = createBasicBlock(".cmpxchg.weak");
+ BasicBlock *ContBB = createBasicBlock(".cmpxchg.continue");
+
+ // FIXME: Why is this a switch?
+ llvm::SwitchInst *SI = Builder.CreateSwitch(IsWeakVal, WeakBB);
+ SI->addCase(Builder.getInt1(false), StrongBB);
+
+ Builder.SetInsertPoint(StrongBB);
+ Value *StrongResult = emitScopeSwitch(false);
+ BasicBlock *StrongSourceBB = Builder.GetInsertBlock();
+ Builder.CreateBr(ContBB);
+
+ Builder.SetInsertPoint(WeakBB);
+ Value *WeakResult = emitScopeSwitch(true);
+ BasicBlock *WeakSourceBB = Builder.GetInsertBlock();
+ Builder.CreateBr(ContBB);
+
+ Builder.SetInsertPoint(ContBB);
+ PHINode *Result =
+ Builder.CreatePHI(BoolTy, 2, Name + ".cmpxchg.isweak.success");
+ Result->addIncoming(WeakResult, WeakSourceBB);
+ Result->addIncoming(StrongResult, StrongSourceBB);
+ return Result;
+ };
- Value *DataSizeVal =
- ConstantInt::get(getSizeTTy(Builder, TLI), DataSizeConst);
- Value *StoreCall = emitAtomicStore(DataSizeVal, Ptr, ValPtr, MemorderCABI,
- Builder, DL, TLI);
- if (StoreCall)
- return;
- }
+ virtual Expected<Value *> emitSizedLibcall() = 0;
- report_fatal_error(
- "__atomic_store builtin not supported by any available means");
-}
+ virtual Expected<Value *> emitLibcall() = 0;
-Value *llvm::emitAtomicCompareExchangeBuiltin(
- Value *Ptr, Value *ExpectedPtr, Value *DesiredPtr,
- std::variant<Value *, bool> IsWeak, bool IsVolatile,
- std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder,
- std::variant<std::monostate, Value *, AtomicOrdering, AtomicOrderingCABI>
- FailureMemorder,
- std::variant<Value *, SyncScope::ID, StringRef> Scope, Value *PrevPtr,
- Type *DataTy, std::optional<uint64_t> DataSize,
- std::optional<uint64_t> AvailableSize, MaybeAlign Align,
- IRBuilderBase &Builder, const DataLayout &DL, const TargetLibraryInfo *TLI,
- const TargetLowering *TL,
- ArrayRef<std::pair<uint32_t, StringRef>> SyncScopes,
- StringRef FallbackScope, llvm::Twine Name, bool AllowInstruction,
- bool AllowSwitch, bool AllowSizedLibcall, bool AllowLibcall) {
- assert(Ptr->getType()->isPointerTy());
- assert(ExpectedPtr->getType()->isPointerTy());
- assert(DesiredPtr->getType()->isPointerTy());
- assert(TLI);
+ virtual Expected<Value *> makeFallbackError() = 0;
- LLVMContext &Ctx = Builder.getContext();
- Function *CurFn = Builder.GetInsertBlock()->getParent();
+ Expected<Value *> emit() {
+ assert(Ptr->getType()->isPointerTy());
+ // assert(ExpectedPtr->getType()->isPointerTy());
+ // assert(DesiredPtr->getType()->isPointerTy());
+ assert(TLI);
- unsigned MaxAtomicSizeSupported = 16;
- if (TL)
- MaxAtomicSizeSupported = TL->getMaxAtomicSizeInBitsSupported() / 8;
+ unsigned MaxAtomicSizeSupported = 16;
+ if (TL)
+ MaxAtomicSizeSupported = TL->getMaxAtomicSizeInBitsSupported() / 8;
- uint64_t DataSizeConst;
- if (DataSize) {
- DataSizeConst = *DataSize;
- } else {
- TypeSize DS = DL.getTypeStoreSize(DataTy);
- DataSizeConst = DS.getFixedValue();
- }
- uint64_t AvailableSizeConst = AvailableSize.value_or(DataSizeConst);
- assert(DataSizeConst <= AvailableSizeConst);
+ if (DataSize) {
+ DataSizeConst = *DataSize;
+ } else {
+ TypeSize DS = DL.getTypeStoreSize(DataTy);
+ DataSizeConst = DS.getFixedValue();
+ }
+ uint64_t AvailableSizeConst = AvailableSize.value_or(DataSizeConst);
+ assert(DataSizeConst <= AvailableSizeConst);
#ifndef NDEBUG
if (DataTy) {
@@ -754,14 +411,12 @@ Value *llvm::emitAtomicCompareExchangeBuiltin(
}
#endif
- Type *BoolTy = Builder.getInt1Ty();
Type *IntTy = getIntTy(Builder, TLI);
- uint64_t PreferredSize = PowerOf2Ceil(DataSizeConst);
+ PreferredSize = PowerOf2Ceil(DataSizeConst);
if (!PreferredSize || PreferredSize > MaxAtomicSizeSupported)
PreferredSize = DataSizeConst;
- llvm::Align EffectiveAlign;
if (Align) {
EffectiveAlign = *Align;
} else {
@@ -779,14 +434,12 @@ Value *llvm::emitAtomicCompareExchangeBuiltin(
// Only use the original data type if it is compatible with cmpxchg (and sized
// libcall function) and matches the preferred size. No type punning needed
// for __atomic_compare_exchange which only takes pointers.
- Type *CoercedTy = nullptr;
if (DataTy && DataSizeConst == PreferredSize &&
(DataTy->isIntegerTy() || DataTy->isPointerTy()))
CoercedTy = DataTy;
else if (PreferredSize <= 16)
CoercedTy = IntegerType::get(Ctx, PreferredSize * 8);
- std::optional<AtomicOrdering> SuccessMemorderConst;
// For resolving the SuccessMemorder/FailureMemorder arguments. If it is
// constant, determine the AtomicOrdering for use with the cmpxchg
// instruction. Also determines the llvm::Value to be passed to
@@ -864,13 +517,12 @@ Value *llvm::emitAtomicCompareExchangeBuiltin(
return std::make_pair(std::nullopt, IntVal);
};
- Value *SuccessMemorderCABI;
- auto [IsWeakConst, IsWeakVal] = processIsWeak(IsWeak);
+ std::tie(IsWeakConst, IsWeakVal) = processIsWeak(IsWeak);
std::tie(SuccessMemorderConst, SuccessMemorderCABI) =
processMemorder(SuccessMemorder);
- auto [FailureMemorderConst, FailureMemorderCABI] =
+ std::tie(FailureMemorderConst, FailureMemorderCABI) =
processMemorder(FailureMemorder);
- auto [ScopeConst, ScopeVal] = processScope(Scope);
+ std::tie(ScopeConst, ScopeVal) = processScope(Scope);
// Fix malformed inputs. We do not want to emit illegal IR.
//
@@ -904,53 +556,20 @@ Value *llvm::emitAtomicCompareExchangeBuiltin(
// The type of ‘<cmp>’ must be an integer or pointer type whose bit width is
// a power of two greater than or equal to eight and less than or equal to a
// target-specific size limit.
- bool CanUseCmpxchngInst = PreferredSize <= MaxAtomicSizeSupported &&
- llvm::isPowerOf2_64(PreferredSize) && CoercedTy;
- bool CanUseSingleCmpxchngInst = CanUseCmpxchngInst && SuccessMemorderConst &&
- FailureMemorderConst && IsWeakConst &&
- ScopeConst;
+ bool CanUseInst = PreferredSize <= MaxAtomicSizeSupported &&
+ llvm::isPowerOf2_64(PreferredSize) && CoercedTy;
+ bool CanUseSingleInst = CanUseInst && SuccessMemorderConst &&
+ FailureMemorderConst && IsWeakConst && ScopeConst;
bool CanUseSizedLibcall =
canUseSizedAtomicCall(PreferredSize, EffectiveAlign, DL) &&
ScopeConst == SyncScope::System;
bool CanUseLibcall = ScopeConst == SyncScope::System;
- Value *ExpectedVal;
- Value *DesiredVal;
-
- // Emit cmpxchg instruction, either as a single instruction, or as a case of a
- // per-constant switch.
- auto EmitCmpxchngInst = [&](bool IsWeak, SyncScope::ID Scope,
- AtomicOrdering SuccessMemorder,
- AtomicOrdering FailureMemorder) {
- AtomicCmpXchgInst *AtomicInst =
- Builder.CreateAtomicCmpXchg(Ptr, ExpectedVal, DesiredVal, Align,
- SuccessMemorder, FailureMemorder, Scope);
- AtomicInst->setName(Name + ".cmpxchg.pair");
- AtomicInst->setAlignment(EffectiveAlign);
- AtomicInst->setWeak(IsWeak);
- AtomicInst->setVolatile(IsVolatile);
-
- if (PrevPtr) {
- Value *PreviousVal = Builder.CreateExtractValue(AtomicInst, /*Idxs=*/0,
- Name + ".cmpxchg.prev");
- Builder.CreateStore(PreviousVal, PrevPtr);
- }
-
- Value *SuccessFailureVal = Builder.CreateExtractValue(
- AtomicInst, /*Idxs=*/1, Name + ".cmpxchg.success");
-
- assert(SuccessFailureVal->getType()->isIntegerTy(1));
- return SuccessFailureVal;
- };
+ if (CanUseSingleInst && AllowInstruction) {
+ prepareInst();
- if (CanUseSingleCmpxchngInst && AllowInstruction) {
- // FIXME: Need to get alignment correct
- ExpectedVal =
- Builder.CreateLoad(CoercedTy, ExpectedPtr, Name + ".cmpxchg.expected");
- DesiredVal =
- Builder.CreateLoad(CoercedTy, DesiredPtr, Name + ".cmpxchg.desired");
- return EmitCmpxchngInst(*IsWeakConst, *ScopeConst, *SuccessMemorderConst,
- *FailureMemorderConst);
+ return emitInst(*IsWeakConst, *ScopeConst, *SuccessMemorderConst,
+ *FailureMemorderConst);
}
// Switching only needed for cmpxchg instruction which requires constant
@@ -959,237 +578,226 @@ Value *llvm::emitAtomicCompareExchangeBuiltin(
// the given target, it will also generate a call to the
// __atomic_compare_exchange function. In that case the switching was very
// unnecessary but cannot be undone.
- if (CanUseCmpxchngInst && AllowSwitch && AllowInstruction) {
- auto createBasicBlock = [&](const Twine &BBName) {
- return BasicBlock::Create(Ctx, Name + BBName, CurFn);
- };
+ if (CanUseInst && AllowSwitch && AllowInstruction) {
+ prepareInst();
+ return emitWeakSwitch();
+ }
- ExpectedVal =
- Builder.CreateLoad(CoercedTy, ExpectedPtr, Name + ".cmpxchg.expected");
- DesiredVal =
- Builder.CreateLoad(CoercedTy, DesiredPtr, Name + ".cmpxchg.desired");
+ // Fallback to a libcall function. From here on IsWeak/Scope/IsVolatile is
+ // ignored. IsWeak is assumed to be false, Scope is assumed to be
+ // SyncScope::System (strongest possible assumption synchronizing with
+ // everything, instead of just a subset of sibling threads), and volatile
+ // does not apply to function calls.
- auto GenFailureMemorderSwitch =
- [&](bool IsWeak, SyncScope::ID Scope,
- AtomicOrdering SuccessMemorder) -> Value * {
- if (FailureMemorderConst) {
- // FIXME: (from CGAtomic)
- // 31.7.2.18: "The failure argument shall not be memory_order_release
- // nor memory_order_acq_rel". Fallback to monotonic.
- //
- // Prior to c++17, "the failure argument shall be no stronger than the
- // success argument". This condition has been lifted and the only
- // precondition is 31.7.2.18. Effectively treat this as a DR and skip
- // language version checks.
- return EmitCmpxchngInst(IsWeak, Scope, SuccessMemorder,
- *FailureMemorderConst);
- }
+ if (CanUseSizedLibcall && AllowSizedLibcall) {
+ Expected<Value *> SizedLibcallResult = emitSizedLibcall();
+ if (SizedLibcallResult)
+ return SizedLibcallResult;
+ }
- // Create all the relevant BB's
- BasicBlock *MonotonicBB = createBasicBlock("monotonic_fail");
- BasicBlock *AcquireBB = createBasicBlock("acquire_fail");
- BasicBlock *SeqCstBB = createBasicBlock("seqcst_fail");
- BasicBlock *ContBB = createBasicBlock("atomic.continue");
-
- // MonotonicBB is arbitrarily chosen as the default case; in practice,
- // this doesn't matter unless someone is crazy enough to use something
- // that doesn't fold to a constant for the ordering.
- llvm::SwitchInst *SI =
- Builder.CreateSwitch(FailureMemorderCABI, MonotonicBB);
- // Implemented as acquire, since it's the closest in LLVM.
- SI->addCase(
- Builder.getInt32(static_cast<int32_t>(AtomicOrderingCABI::consume)),
- AcquireBB);
- SI->addCase(
- Builder.getInt32(static_cast<int32_t>(AtomicOrderingCABI::acquire)),
- AcquireBB);
- SI->addCase(
- Builder.getInt32(static_cast<int32_t>(AtomicOrderingCABI::seq_cst)),
- SeqCstBB);
-
- // Emit all the different atomics
- Builder.SetInsertPoint(MonotonicBB);
- Value *MonotonicResult = EmitCmpxchngInst(IsWeak, Scope, SuccessMemorder,
- AtomicOrdering::Monotonic);
- BasicBlock *MonotonicSourceBB = Builder.GetInsertBlock();
- Builder.CreateBr(ContBB);
+ if (CanUseLibcall && AllowLibcall) {
+ Expected<Value *> LibcallResult = emitSizedLibcall();
+ if (LibcallResult)
+ return LibcallResult;
+ }
- Builder.SetInsertPoint(AcquireBB);
- Value *AcquireResult = EmitCmpxchngInst(IsWeak, Scope, SuccessMemorder,
- AtomicOrdering::Acquire);
- BasicBlock *AcquireSourceBB = Builder.GetInsertBlock();
- Builder.CreateBr(ContBB);
+ return makeFallbackError();
+ }
+};
- Builder.SetInsertPoint(SeqCstBB);
- Value *SeqCstResult =
- EmitCmpxchngInst(IsWeak, Scope, SuccessMemorder,
- AtomicOrdering::SequentiallyConsistent);
- BasicBlock *SeqCstSourceBB = Builder.GetInsertBlock();
- Builder.CreateBr(ContBB);
+class AtomicLoadEmitter final : public AtomicEmitter {
+public:
+ using AtomicEmitter::AtomicEmitter;
- Builder.SetInsertPoint(ContBB);
- PHINode *Result = Builder.CreatePHI(BoolTy, 3, Name + ".cmpxchg.success");
- Result->addIncoming(MonotonicResult, MonotonicSourceBB);
- Result->addIncoming(AcquireResult, AcquireSourceBB);
- Result->addIncoming(SeqCstResult, SeqCstSourceBB);
- return Result;
- };
-
- auto GenSuccessMemorderSwitch = [&](bool IsWeak,
- SyncScope::ID Scope) -> Value * {
- if (SuccessMemorderConst)
- return GenFailureMemorderSwitch(IsWeak, Scope, *SuccessMemorderConst);
-
- // Create all the relevant BB's
- BasicBlock *MonotonicBB = createBasicBlock(".monotonic");
- BasicBlock *AcquireBB = createBasicBlock(".acquire");
- BasicBlock *ReleaseBB = createBasicBlock(".release");
- BasicBlock *AcqRelBB = createBasicBlock(".acqrel");
- BasicBlock *SeqCstBB = createBasicBlock(".seqcst");
- BasicBlock *ContBB = createBasicBlock(".atomic.continue");
-
- // Create the switch for the split
- // MonotonicBB is arbitrarily chosen as the default case; in practice,
- // this doesn't matter unless someone is crazy enough to use something
- // that doesn't fold to a constant for the ordering.
- Value *Order = Builder.CreateIntCast(SuccessMemorderCABI,
- Builder.getInt32Ty(), false);
- llvm::SwitchInst *SI = Builder.CreateSwitch(Order, MonotonicBB);
-
- Builder.SetInsertPoint(ContBB);
- PHINode *Result = Builder.CreatePHI(BoolTy, 5, Name + ".cmpxchg.success");
-
- // Emit all the different atomics
- Builder.SetInsertPoint(MonotonicBB);
- Value *MonotonicResult =
- GenFailureMemorderSwitch(IsWeak, Scope, AtomicOrdering::Monotonic);
- Result->addIncoming(MonotonicResult, MonotonicBB);
- Builder.CreateBr(ContBB);
+ Error emitLoad(Value *RetPtr) {
+ assert(RetPtr->getType()->isPointerTy());
+ this->RetPtr = RetPtr;
+ return emit().takeError();
+ }
- Builder.SetInsertPoint(AcquireBB);
- Value *AcquireResult =
- GenFailureMemorderSwitch(IsWeak, Scope, AtomicOrdering::Acquire);
- Builder.CreateBr(ContBB);
- SI->addCase(
- Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::consume)),
- Builder.GetInsertBlock());
- SI->addCase(
- Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::acquire)),
- Builder.GetInsertBlock());
- Result->addIncoming(AcquireResult, AcquireBB);
-
- Builder.SetInsertPoint(ReleaseBB);
- Value *ReleaseResult =
- GenFailureMemorderSwitch(IsWeak, Scope, AtomicOrdering::Release);
- Builder.CreateBr(ContBB);
- SI->addCase(
- Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::release)),
- Builder.GetInsertBlock());
- Result->addIncoming(ReleaseResult, Builder.GetInsertBlock());
-
- Builder.SetInsertPoint(AcqRelBB);
- Value *AcqRelResult = GenFailureMemorderSwitch(
- IsWeak, Scope, AtomicOrdering::AcquireRelease);
- Builder.CreateBr(ContBB);
- SI->addCase(
- Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::acq_rel)),
- AcqRelBB);
- Result->addIncoming(AcqRelResult, Builder.GetInsertBlock());
-
- Builder.SetInsertPoint(SeqCstBB);
- Value *SeqCstResult = GenFailureMemorderSwitch(
- IsWeak, Scope, AtomicOrdering::SequentiallyConsistent);
- Builder.CreateBr(ContBB);
- SI->addCase(
- Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::seq_cst)),
- SeqCstBB);
- Result->addIncoming(SeqCstResult, Builder.GetInsertBlock());
-
- Builder.SetInsertPoint(Result->getNextNode());
- return Result;
- };
-
- auto GenScopeSwitch = [&](bool IsWeak) -> Value * {
- if (ScopeConst)
- return GenSuccessMemorderSwitch(IsWeak, *ScopeConst);
-
- // Handle non-constant scope.
- DenseMap<unsigned, BasicBlock *> BB;
- for (const auto &S : SyncScopes) {
- if (FallbackScope == S.second)
- continue; // always the default case
- BB[S.first] = createBasicBlock(Twine(".cmpxchg.scope.") + S.second);
- }
+protected:
+ Value *RetPtr;
- BasicBlock *DefaultBB = createBasicBlock(".cmpxchg.scope.fallback");
- BasicBlock *ContBB = createBasicBlock(".cmpxchg.scope.continue");
-
- Builder.SetInsertPoint(ContBB);
- PHINode *Result = Builder.CreatePHI(BoolTy, SyncScopes.size() + 1,
- Name + ".cmpxchg.success");
-
- Value *SC = Builder.CreateIntCast(ScopeVal, Builder.getInt32Ty(),
- /*IsSigned*/ false,
- Name + ".cmpxchg.scope.cast");
- // If unsupported synch scope is encountered at run time, assume a
- // fallback synch scope value.
- SwitchInst *SI = Builder.CreateSwitch(SC, DefaultBB);
- for (const auto &S : SyncScopes) {
- BasicBlock *B = BB[S.first];
- SI->addCase(Builder.getInt32(S.first), B);
-
- Builder.SetInsertPoint(B);
- SyncScope::ID SyncScopeID = Ctx.getOrInsertSyncScopeID(S.second);
- Value *SyncResult = GenSuccessMemorderSwitch(IsWeak, SyncScopeID);
- Result->addIncoming(SyncResult, Builder.GetInsertBlock());
- Builder.CreateBr(ContBB);
- }
+ Value *emitInst(bool IsWeak, SyncScope::ID Scope,
+ AtomicOrdering SuccessMemorder,
+ AtomicOrdering FailureMemorder) override {
+ LoadInst *AtomicInst =
+ Builder.CreateLoad(CoercedTy, Ptr, IsVolatile, Name + ".atomic.load");
+ AtomicInst->setAtomic(SuccessMemorder, Scope);
+ AtomicInst->setAlignment(EffectiveAlign);
+ AtomicInst->setVolatile(IsVolatile);
- Builder.SetInsertPoint(DefaultBB);
- SyncScope::ID SyncScopeID = Ctx.getOrInsertSyncScopeID(FallbackScope);
- Value *DefaultResult = GenSuccessMemorderSwitch(IsWeak, SyncScopeID);
- Result->addIncoming(DefaultResult, Builder.GetInsertBlock());
- Builder.CreateBr(ContBB);
+ // Store loaded result to where the caller expects it.
+ // FIXME: Do we need to zero the padding, if any?
+ Builder.CreateStore(AtomicInst, RetPtr, IsVolatile);
+ return nullptr;
+ }
+
+ Expected<Value *> emitSizedLibcall() override {
+ Value *LoadResult = emitAtomicLoadN(PreferredSize, Ptr, SuccessMemorderCABI,
+ Builder, DL, TLI);
+ LoadResult->setName(Name);
+ if (LoadResult) {
+ Builder.CreateStore(LoadResult, RetPtr);
+ return nullptr;
+ }
+
+ // emitAtomicLoadN can return nullptr if the backend does not
+ // support sized libcalls. Fall back to the non-sized libcall and remove the
+ // unused load again.
+ return make_error<StringError>("__atomic_load_N libcall absent",
+ inconvertibleErrorCode());
+ }
- Builder.SetInsertPoint(Result->getNextNode());
- return Result;
- };
+ Expected<Value *> emitLibcall() override {
+ // Fallback to a libcall function. From here on IsWeak/Scope/IsVolatile is
+ // ignored. IsWeak is assumed to be false, Scope is assumed to be
+ // SyncScope::System (strongest possible assumption synchronizing with
+ // everything, instead of just a subset of sibling threads), and volatile
+ // does not apply to function calls.
- auto GenWeakSwitch = [&]() -> Value * {
- if (IsWeakConst)
- return GenScopeSwitch(*IsWeakConst);
+ Value *DataSizeVal =
+ ConstantInt::get(getSizeTTy(Builder, TLI), DataSizeConst);
+ Value *LoadCall = emitAtomicLoad(DataSizeVal, Ptr, RetPtr,
+ SuccessMemorderCABI, Builder, DL, TLI);
+ if (LoadCall) {
+ LoadCall->setName(Name);
+ return nullptr;
+ }
- // Create all the relevant BB's
- BasicBlock *StrongBB = createBasicBlock(".cmpxchg.strong");
- BasicBlock *WeakBB = createBasicBlock(".cmpxchg.weak");
- BasicBlock *ContBB = createBasicBlock(".cmpxchg.continue");
+ return make_error<StringError>("__atomic_load libcall absent",
+ inconvertibleErrorCode());
+ }
- // FIXME: Why is this a switch?
- llvm::SwitchInst *SI = Builder.CreateSwitch(IsWeakVal, WeakBB);
- SI->addCase(Builder.getInt1(false), StrongBB);
+ Expected<Value *> makeFallbackError() override {
+ return make_error<StringError>(
+ "__atomic_laod builtin not supported by any available means",
+ inconvertibleErrorCode());
+ }
+};
- Builder.SetInsertPoint(StrongBB);
- Value *StrongResult = GenScopeSwitch(false);
- BasicBlock *StrongSourceBB = Builder.GetInsertBlock();
- Builder.CreateBr(ContBB);
+class AtomicStoreEmitter final : public AtomicEmitter {
+public:
+ using AtomicEmitter::AtomicEmitter;
- Builder.SetInsertPoint(WeakBB);
- Value *WeakResult = GenScopeSwitch(true);
- BasicBlock *WeakSourceBB = Builder.GetInsertBlock();
- Builder.CreateBr(ContBB);
+ Error emitStore(Value *ValPtr) {
+ assert(ValPtr->getType()->isPointerTy());
+ this->ValPtr = ValPtr;
+ return emit().takeError();
+ }
- Builder.SetInsertPoint(ContBB);
- PHINode *Result =
- Builder.CreatePHI(BoolTy, 2, Name + ".cmpxchg.isweak.success");
- Result->addIncoming(WeakResult, WeakSourceBB);
- Result->addIncoming(StrongResult, StrongSourceBB);
- return Result;
- };
+protected:
+ Value *ValPtr;
+ Value *Val;
- return GenWeakSwitch();
+ void prepareInst() override {
+ Val = Builder.CreateLoad(CoercedTy, ValPtr, Name + ".atomic.val");
}
- if (CanUseSizedLibcall && AllowSizedLibcall) {
+ Value *emitInst(bool IsWeak, SyncScope::ID Scope,
+ AtomicOrdering SuccessMemorder,
+ AtomicOrdering FailureMemorder) override {
+ StoreInst *AtomicInst = Builder.CreateStore(Val, Ptr, IsVolatile);
+ AtomicInst->setAtomic(SuccessMemorder, Scope);
+ AtomicInst->setAlignment(EffectiveAlign);
+ AtomicInst->setVolatile(IsVolatile);
+ return nullptr;
+ }
+
+ Expected<Value *> emitSizedLibcall() override {
+ Val = Builder.CreateLoad(CoercedTy, ValPtr, Name + ".atomic.val");
+ Value *StoreCall = emitAtomicStoreN(DataSizeConst, Ptr, Val,
+ SuccessMemorderCABI, Builder, DL, TLI);
+ StoreCall->setName(Name);
+ if (StoreCall)
+ return nullptr;
+
+ // emitAtomiStoreN can return nullptr if the backend does not
+ // support sized libcalls. Fall back to the non-sized libcall and remove the
+ // unused load again.
+ return make_error<StringError>("__atomic_store_N libcall absent",
+ inconvertibleErrorCode());
+ }
+
+ Expected<Value *> emitLibcall() override {
+ // Fallback to a libcall function. From here on IsWeak/Scope/IsVolatile is
+ // ignored. IsWeak is assumed to be false, Scope is assumed to be
+ // SyncScope::System (strongest possible assumption synchronizing with
+ // everything, instead of just a subset of sibling threads), and volatile
+ // does not apply to function calls.
+
+ Value *DataSizeVal =
+ ConstantInt::get(getSizeTTy(Builder, TLI), DataSizeConst);
+ Value *StoreCall = emitAtomicStore(DataSizeVal, Ptr, ValPtr,
+ SuccessMemorderCABI, Builder, DL, TLI);
+ if (StoreCall)
+ return nullptr;
+
+ return make_error<StringError>("__atomic_store libcall absent",
+ inconvertibleErrorCode());
+ }
+
+ Expected<Value *> makeFallbackError() override {
+ return make_error<StringError>(
+ "__atomic_store builtin not supported by any available means",
+ inconvertibleErrorCode());
+ }
+};
+
+class AtomicCompareExchangeEmitter final : public AtomicEmitter {
+public:
+ using AtomicEmitter::AtomicEmitter;
+
+ Expected<Value *> emitCmpXchg(Value *ExpectedPtr, Value *DesiredPtr,
+ Value *PrevPtr) {
+ assert(ExpectedPtr->getType()->isPointerTy());
+ assert(DesiredPtr->getType()->isPointerTy());
+ assert(!PrevPtr || PrevPtr->getType()->isPointerTy());
+
+ this->ExpectedPtr = ExpectedPtr;
+ this->DesiredPtr = DesiredPtr;
+ this->PrevPtr = PrevPtr;
+ return emit();
+ }
+
+protected:
+ Value *ExpectedPtr;
+ Value *DesiredPtr;
+ Value *PrevPtr;
+ Value *ExpectedVal;
+ Value *DesiredVal;
+
+ void prepareInst() override {
+ ExpectedVal =
+ Builder.CreateLoad(CoercedTy, ExpectedPtr, Name + ".cmpxchg.expected");
+ DesiredVal =
+ Builder.CreateLoad(CoercedTy, DesiredPtr, Name + ".cmpxchg.desired");
+ }
+
+ Value *emitInst(bool IsWeak, SyncScope::ID Scope,
+ AtomicOrdering SuccessMemorder,
+ AtomicOrdering FailureMemorder) override {
+ AtomicCmpXchgInst *AtomicInst =
+ Builder.CreateAtomicCmpXchg(Ptr, ExpectedVal, DesiredVal, Align,
+ SuccessMemorder, FailureMemorder, Scope);
+ AtomicInst->setName(Name + ".cmpxchg.pair");
+ AtomicInst->setAlignment(EffectiveAlign);
+ AtomicInst->setWeak(IsWeak);
+ AtomicInst->setVolatile(IsVolatile);
+
+ if (PrevPtr) {
+ Value *PreviousVal = Builder.CreateExtractValue(AtomicInst, /*Idxs=*/0,
+ Name + ".cmpxchg.prev");
+ Builder.CreateStore(PreviousVal, PrevPtr);
+ }
+ Value *SuccessFailureVal = Builder.CreateExtractValue(
+ AtomicInst, /*Idxs=*/1, Name + ".cmpxchg.success");
+
+ assert(SuccessFailureVal->getType()->isIntegerTy(1));
+ return SuccessFailureVal;
+ }
+
+ Expected<Value *> emitSizedLibcall() override {
LoadInst *DesiredVal =
Builder.CreateLoad(IntegerType::get(Ctx, PreferredSize * 8), DesiredPtr,
Name + ".cmpxchg.desired");
@@ -1210,15 +818,11 @@ Value *llvm::emitAtomicCompareExchangeBuiltin(
// support sized libcalls. Fall back to the non-sized libcall and remove the
// unused load again.
DesiredVal->eraseFromParent();
+ return make_error<StringError>("__atomic_compare_exchange_N libcall absent",
+ inconvertibleErrorCode());
}
- if (CanUseLibcall && AllowLibcall) {
- // Fallback to a libcall function. From here on IsWeak/Scope/IsVolatile is
- // ignored. IsWeak is assumed to be false, Scope is assumed to be
- // SyncScope::System (strongest possible assumption synchronizing with
- // everything, instead of just a subset of sibling threads), and volatile
- // does not apply to function calls.
-
+ Expected<Value *> emitLibcall() override {
// FIXME: Some AMDGCN regression tests the addrspace, but
// __atomic_compare_exchange by definition is addrsspace(0) and
// emitAtomicCompareExchange will complain about it.
@@ -1247,13 +851,82 @@ Value *llvm::emitAtomicCompareExchangeBuiltin(
Builder.CreateMemCpy(PrevPtr, {}, ExpectedPtr, {}, DataSizeConst);
return SuccessBool;
}
+
+ return make_error<StringError>("__atomic_compare_exchange libcall absent",
+ inconvertibleErrorCode());
}
- report_fatal_error(
- "__atomic_compare_exchange builtin not supported by any available means");
+ Expected<Value *> makeFallbackError() override {
+ return make_error<StringError>("__atomic_compare_exchange builtin not "
+ "supported by any available means",
+ inconvertibleErrorCode());
+ }
+};
+
+} // namespace
+
+Error llvm::emitAtomicLoadBuiltin(
+ Value *Ptr, Value *RetPtr,
+ // std::variant<Value *, bool> IsWeak,
+ bool IsVolatile,
+ std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> Memorder,
+ std::variant<Value *, SyncScope::ID, StringRef> Scope, Type *DataTy,
+ std::optional<uint64_t> DataSize, std::optional<uint64_t> AvailableSize,
+ MaybeAlign Align, IRBuilderBase &Builder, const DataLayout &DL,
+ const TargetLibraryInfo *TLI, const TargetLowering *TL,
+ ArrayRef<std::pair<uint32_t, StringRef>> SyncScopes,
+ StringRef FallbackScope, llvm::Twine Name, bool AllowInstruction,
+ bool AllowSwitch, bool AllowSizedLibcall, bool AllowLibcall) {
+ AtomicLoadEmitter Emitter(
+ Ptr, false, IsVolatile, Memorder, {}, Scope, DataTy, DataSize,
+ AvailableSize, Align, Builder, DL, TLI, TL, SyncScopes, FallbackScope,
+ Name, AllowInstruction, AllowSwitch, AllowSizedLibcall, AllowLibcall);
+ return Emitter.emitLoad(RetPtr);
+}
+
+Error llvm::emitAtomicStoreBuiltin(
+ Value *Ptr, Value *ValPtr,
+ // std::variant<Value *, bool> IsWeak,
+ bool IsVolatile,
+ std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> Memorder,
+ std::variant<Value *, SyncScope::ID, StringRef> Scope, Type *DataTy,
+ std::optional<uint64_t> DataSize, std::optional<uint64_t> AvailableSize,
+ MaybeAlign Align, IRBuilderBase &Builder, const DataLayout &DL,
+ const TargetLibraryInfo *TLI, const TargetLowering *TL,
+ ArrayRef<std::pair<uint32_t, StringRef>> SyncScopes,
+ StringRef FallbackScope, llvm::Twine Name, bool AllowInstruction,
+ bool AllowSwitch, bool AllowSizedLibcall, bool AllowLibcall) {
+
+ AtomicStoreEmitter Emitter(
+ Ptr, false, IsVolatile, Memorder, {}, Scope, DataTy, DataSize,
+ AvailableSize, Align, Builder, DL, TLI, TL, SyncScopes, FallbackScope,
+ Name, AllowInstruction, AllowSwitch, AllowSizedLibcall, AllowLibcall);
+ return Emitter.emitStore(ValPtr);
+}
+
+Expected<Value *> llvm::emitAtomicCompareExchangeBuiltin(
+ Value *Ptr, Value *ExpectedPtr, Value *DesiredPtr,
+ std::variant<Value *, bool> IsWeak, bool IsVolatile,
+ std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder,
+ std::variant<std::monostate, Value *, AtomicOrdering, AtomicOrderingCABI>
+ FailureMemorder,
+ std::variant<Value *, SyncScope::ID, StringRef> Scope, Value *PrevPtr,
+ Type *DataTy, std::optional<uint64_t> DataSize,
+ std::optional<uint64_t> AvailableSize, MaybeAlign Align,
+ IRBuilderBase &Builder, const DataLayout &DL, const TargetLibraryInfo *TLI,
+ const TargetLowering *TL,
+ ArrayRef<std::pair<uint32_t, StringRef>> SyncScopes,
+ StringRef FallbackScope, llvm::Twine Name, bool AllowInstruction,
+ bool AllowSwitch, bool AllowSizedLibcall, bool AllowLibcall) {
+ AtomicCompareExchangeEmitter Emitter(
+ Ptr, IsWeak, IsVolatile, SuccessMemorder, FailureMemorder, Scope, DataTy,
+ DataSize, AvailableSize, Align, Builder, DL, TLI, TL, SyncScopes,
+ FallbackScope, Name, AllowInstruction, AllowSwitch, AllowSizedLibcall,
+ AllowLibcall);
+ return Emitter.emitCmpXchg(ExpectedPtr, DesiredPtr, PrevPtr);
}
-Value *llvm::emitAtomicCompareExchangeBuiltin(
+Expected<Value *> llvm::emitAtomicCompareExchangeBuiltin(
Value *Ptr, Value *ExpectedPtr, Value *DesiredPtr,
std::variant<Value *, bool> IsWeak, bool IsVolatile,
std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder,
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index dae14601f6667b..8ae904367165a2 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -3762,22 +3762,25 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicReadFlt) {
OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false};
OpenMPIRBuilder::AtomicOpValue V = {VVal, Float32, false, false};
- Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO));
+ OpenMPIRBuilder::InsertPointOrErrorTy AfterReadIP =
+ OMPBuilder.createAtomicRead(Loc, X, V, AO);
+ EXPECT_TRUE((bool)AfterReadIP);
+ Builder.restoreIP(*AfterReadIP);
- IntegerType *IntCastTy =
- IntegerType::get(M->getContext(), Float32->getScalarSizeInBits());
+ // IntegerType *IntCastTy = IntegerType::get(M->getContext(),
+ // Float32->getScalarSizeInBits());
LoadInst *AtomicLoad = cast<LoadInst>(VVal->getNextNode());
EXPECT_TRUE(AtomicLoad->isAtomic());
EXPECT_EQ(AtomicLoad->getPointerOperand(), XVal);
- BitCastInst *CastToFlt = cast<BitCastInst>(AtomicLoad->getNextNode());
- EXPECT_EQ(CastToFlt->getSrcTy(), IntCastTy);
- EXPECT_EQ(CastToFlt->getDestTy(), Float32);
- EXPECT_EQ(CastToFlt->getOperand(0), AtomicLoad);
+ // BitCastInst *CastToFlt = cast<BitCastInst>(AtomicLoad->getNextNode());
+ // EXPECT_EQ(CastToFlt->getSrcTy(), IntCastTy);
+ // EXPECT_EQ(CastToFlt->getDestTy(), Float32);
+ // EXPECT_EQ(CastToFlt->getOperand(0), AtomicLoad);
- StoreInst *StoreofAtomic = cast<StoreInst>(CastToFlt->getNextNode());
- EXPECT_EQ(StoreofAtomic->getValueOperand(), CastToFlt);
+ StoreInst *StoreofAtomic = cast<StoreInst>(AtomicLoad->getNextNode());
+ EXPECT_EQ(StoreofAtomic->getValueOperand(), AtomicLoad);
EXPECT_EQ(StoreofAtomic->getPointerOperand(), VVal);
Builder.CreateRetVoid();
@@ -3804,7 +3807,10 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicReadInt) {
BasicBlock *EntryBB = BB;
- Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO));
+ OpenMPIRBuilder::InsertPointOrErrorTy AfterReadIP =
+ OMPBuilder.createAtomicRead(Loc, X, V, AO);
+ EXPECT_TRUE((bool)AfterReadIP);
+ Builder.restoreIP(*AfterReadIP);
LoadInst *AtomicLoad = nullptr;
StoreInst *StoreofAtomic = nullptr;
@@ -3852,8 +3858,10 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteFlt) {
AtomicOrdering AO = AtomicOrdering::Monotonic;
Constant *ValToWrite = ConstantFP::get(Float32, 1.0);
- Builder.restoreIP(
- OMPBuilder.createAtomicWrite(Loc, AllocaIP, X, ValToWrite, AO));
+ OpenMPIRBuilder::InsertPointOrErrorTy AfterWriteIP =
+ OMPBuilder.createAtomicWrite(Loc, AllocaIP, X, ValToWrite, AO);
+ EXPECT_TRUE((bool)AfterWriteIP);
+ Builder.restoreIP(*AfterWriteIP);
IntegerType *IntCastTy =
IntegerType::get(M->getContext(), Float32->getScalarSizeInBits());
@@ -3890,8 +3898,10 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteInt) {
BasicBlock *EntryBB = BB;
- Builder.restoreIP(
- OMPBuilder.createAtomicWrite(Loc, AllocaIP, X, ValToWrite, AO));
+ OpenMPIRBuilder::InsertPointOrErrorTy AfterWriteIP =
+ OMPBuilder.createAtomicWrite(Loc, AllocaIP, X, ValToWrite, AO);
+ EXPECT_TRUE((bool)AfterWriteIP);
+ Builder.restoreIP(*AfterWriteIP);
StoreInst *StoreofAtomic = nullptr;
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index dca29f55661b0c..643f3f8793d877 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2204,7 +2204,12 @@ convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder::AtomicOpValue V = {v, elementType, false, false};
llvm::OpenMPIRBuilder::AtomicOpValue X = {x, elementType, false, false};
- builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO));
+ auto ContIP = ompBuilder->createAtomicRead(ompLoc, X, V, AO);
+ if (!ContIP)
+ return opInst.emitError(toString(
+ ContIP.takeError())); // Contains either an error code or string
+ // message; should be able to extract.
+ builder.restoreIP(*ContIP);
return success();
}
@@ -2225,7 +2230,11 @@ convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
llvm::Type *ty = moduleTranslation.convertType(writeOp.getExpr().getType());
llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false,
/*isVolatile=*/false};
- builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao));
+ auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
+ auto contIP = ompBuilder->createAtomicWrite(ompLoc, allocaIP, x, expr, ao);
+ if (!contIP)
+ opInst.emitError(toString(contIP.takeError()));
+ builder.restoreIP(*contIP);
return success();
}
>From 9d86ff64026656b90e73095113f3fd5066ad289f Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Tue, 12 Nov 2024 15:53:25 +0100
Subject: [PATCH 07/17] Fixing unittests
---
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 11 +-
.../Frontend/OpenMPIRBuilderTest.cpp | 139 ++++++++++++++----
2 files changed, 115 insertions(+), 35 deletions(-)
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 5b9264cfd189be..3eb600b6947a08 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -8262,12 +8262,11 @@ Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
return std::move(ALResult);
// Create new CFG.
- BasicBlock *ContBB = splitBB(Builder, true, X->getName() + ".atomic.cont");
- BasicBlock *ExitBB = splitBB(Builder, false, X->getName() + ".atomic.exit");
- InsertPointTy ContIP = Builder.saveIP();
+ BasicBlock *DoneBB = splitBB(Builder, false, X->getName() + ".atomic.done");
+ BasicBlock *RetryBB = splitBB(Builder, true, X->getName() + ".atomic.retry");
// Emit the update transaction...
- Builder.SetInsertPoint(ContBB);
+ Builder.SetInsertPoint(RetryBB);
// 1. Let the user code compute the new value.
// FIXME: This should not be done by-value, as the type might be unreasonable
@@ -8303,10 +8302,10 @@ Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
Value *Success = *ACEResult;
// 3. Repeat transaction until successful.
- Builder.CreateCondBr(Success, ExitBB, ContBB);
+ Builder.CreateCondBr(Success, DoneBB, RetryBB);
// Continue when the update transaction was successful.
- Builder.restoreIP(ContIP);
+ Builder.SetInsertPoint(DoneBB);
return std::make_pair(OrigVal, UpdVal);
}
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 8ae904367165a2..110efb6f32a784 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -43,6 +43,8 @@ using namespace omp;
namespace {
+
+
/// Create an instruction that uses the values in \p Values. We use "printf"
/// just because it is often used for this purpose in test code, but it is never
/// executed here.
@@ -174,6 +176,93 @@ static omp::ScheduleKind getSchedKind(omp::OMPScheduleType SchedType) {
}
}
+
+static Value * followStoreLoad(Instruction *I, Value *V) {
+ while (true) {
+ Value *Addr;
+ while (true) {
+ I= I->getPrevNode();
+ if (!I)
+ return V;
+ if (!isa<LoadInst>(I))
+ continue ;
+ auto LoadI = cast<LoadInst>(I);
+ if (LoadI != V)
+ continue ;
+ Addr = LoadI->getPointerOperand();
+ V =nullptr;
+ break;
+ }
+
+ while (true) {
+ I = I->getPrevNode();
+ if (!I)
+ return V;
+ if (!isa<StoreInst>(I))
+ continue ;
+ auto StoreI = cast<StoreInst>(I);
+ if (StoreI->getPointerOperand() != Addr )
+ continue ;
+ V = StoreI->getValueOperand();
+ break;
+ }
+ }
+}
+
+
+static Value * followStorePtr(Value *Addr) {
+ Value *V = nullptr;
+ // while (true) {
+ if (!isa<Instruction>(Addr))
+ return V;
+ auto Alloca = cast<Instruction>(Addr);
+ auto LDUse = [&]() -> LoadInst *{
+ for (auto &&U : Alloca->uses())
+ if (auto LD = dyn_cast<LoadInst>(U.getUser()))
+ if (LD->getPointerOperand() == Alloca)
+ return LD;
+ return nullptr;
+ }();
+ if (!LDUse)
+ return V;
+
+
+
+ auto STUse = [](Instruction *Addr) -> StoreInst *{
+ for (auto &&U : Addr->uses())
+ if (auto ST = dyn_cast<StoreInst>(U.getUser()))
+ if (ST->getPointerOperand() == Addr && !isa<LoadInst>( ST->getValueOperand()))
+ return ST;
+ return nullptr;
+ }(Alloca);
+ return STUse;
+
+ // }
+}
+
+static StoreInst *findAtomicInst(BasicBlock *EntryBB, Value *XVal) {
+ StoreInst *StoreofAtomic=nullptr;
+ for (Instruction &Cur : *EntryBB) {
+ if (isa<StoreInst>(Cur)) {
+ StoreofAtomic = cast<StoreInst>(&Cur);
+ if (StoreofAtomic->getPointerOperand() == XVal)
+ continue;
+ StoreofAtomic = nullptr;
+ }
+ }
+ return StoreofAtomic;
+}
+
+template <typename T> static T* findLastInstInBB(BasicBlock *BB) {
+ for (Instruction &Cur : reverse(*BB)) {
+ if (T *Candidate = dyn_cast<T>(&Cur))
+ return Candidate;
+ }
+ return nullptr;
+}
+
+
+
class OpenMPIRBuilderTest : public testing::Test {
protected:
void SetUp() override {
@@ -3863,13 +3952,12 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteFlt) {
EXPECT_TRUE((bool)AfterWriteIP);
Builder.restoreIP(*AfterWriteIP);
- IntegerType *IntCastTy =
- IntegerType::get(M->getContext(), Float32->getScalarSizeInBits());
+// IntegerType *IntCastTy = IntegerType::get(M->getContext(), Float32->getScalarSizeInBits());
- Value *ExprCast = Builder.CreateBitCast(ValToWrite, IntCastTy);
+ // Value *ExprCast = Builder.CreateBitCast(ValToWrite, IntCastTy);
- StoreInst *StoreofAtomic = cast<StoreInst>(XVal->getNextNode());
- EXPECT_EQ(StoreofAtomic->getValueOperand(), ExprCast);
+StoreInst *StoreofAtomic = findAtomicInst(OMPBuilder.getInsertionPoint().getBlock(), XVal);
+ EXPECT_EQ(followStoreLoad(StoreofAtomic, StoreofAtomic->getValueOperand()), ValToWrite);
EXPECT_EQ(StoreofAtomic->getPointerOperand(), XVal);
EXPECT_TRUE(StoreofAtomic->isAtomic());
@@ -3903,20 +3991,13 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteInt) {
EXPECT_TRUE((bool)AfterWriteIP);
Builder.restoreIP(*AfterWriteIP);
- StoreInst *StoreofAtomic = nullptr;
+ StoreInst *StoreofAtomic = findAtomicInst(EntryBB, XVal);
+
- for (Instruction &Cur : *EntryBB) {
- if (isa<StoreInst>(Cur)) {
- StoreofAtomic = cast<StoreInst>(&Cur);
- if (StoreofAtomic->getPointerOperand() == XVal)
- continue;
- StoreofAtomic = nullptr;
- }
- }
EXPECT_NE(StoreofAtomic, nullptr);
EXPECT_TRUE(StoreofAtomic->isAtomic());
- EXPECT_EQ(StoreofAtomic->getValueOperand(), ValToWrite);
+ EXPECT_EQ(followStoreLoad(StoreofAtomic, StoreofAtomic->getValueOperand()), ValToWrite);
Builder.CreateRetVoid();
OMPBuilder.finalize();
@@ -3934,7 +4015,8 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) {
IntegerType *Int32 = Type::getInt32Ty(M->getContext());
AllocaInst *XVal = Builder.CreateAlloca(Int32);
XVal->setName("AtomicVar");
- Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
+ auto ExpectedVal = ConstantInt::get(Type::getInt32Ty(Ctx), 0U);
+ Builder.CreateStore(ExpectedVal, XVal);
OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
AtomicOrdering AO = AtomicOrdering::Monotonic;
ConstantInt *ConstVal = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
@@ -3951,11 +4033,11 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) {
Sub = IRB.CreateSub(ConstVal, Atomic);
return Sub;
};
- OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createAtomicUpdate(
- Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart);
+ OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createAtomicUpdate(Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart);
assert(AfterIP && "unexpected error");
Builder.restoreIP(*AfterIP);
- BasicBlock *ContBB = EntryBB->getSingleSuccessor();
+ BasicBlock *DoneBB = Builder.GetInsertBlock();
+ BasicBlock *ContBB = DoneBB->getSinglePredecessor();
BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
EXPECT_NE(ContTI, nullptr);
BasicBlock *EndBB = ContTI->getSuccessor(0);
@@ -3963,24 +4045,23 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) {
EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
EXPECT_NE(EndBB, nullptr);
- PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
- EXPECT_NE(Phi, nullptr);
- EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
- EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
- EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
+ //PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
+ //EXPECT_NE(Phi, nullptr);
+ //EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
+ //EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
+ //EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
EXPECT_EQ(Sub->getNumUses(), 1U);
StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
- ExtractValueInst *ExVI1 =
- dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB));
+ ExtractValueInst *ExVI1 = findLastInstInBB<ExtractValueInst>(ContBB);
EXPECT_NE(ExVI1, nullptr);
- AtomicCmpXchgInst *CmpExchg =
- dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
+ AtomicCmpXchgInst *CmpExchg = dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
EXPECT_NE(CmpExchg, nullptr);
EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
- EXPECT_EQ(CmpExchg->getCompareOperand(), Phi);
+ EXPECT_EQ(followStorePtr(CmpExchg->getCompareOperand()), ExpectedVal);
+ EXPECT_EQ(followStorePtr(CmpExchg->getNewValOperand()), Sub);
EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
>From dcced9ae3132ecdd1bd6b68160b849dce95ad8a1 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Tue, 12 Nov 2024 17:32:53 +0100
Subject: [PATCH 08/17] Fix unittest
---
.../Frontend/OpenMPIRBuilderTest.cpp | 108 +++++++++++-------
1 file changed, 69 insertions(+), 39 deletions(-)
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 110efb6f32a784..f3a779bdcc3cb6 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -210,24 +210,55 @@ static Value * followStoreLoad(Instruction *I, Value *V) {
}
-static Value * followStorePtr(Value *Addr) {
- Value *V = nullptr;
- // while (true) {
- if (!isa<Instruction>(Addr))
- return V;
- auto Alloca = cast<Instruction>(Addr);
- auto LDUse = [&]() -> LoadInst *{
- for (auto &&U : Alloca->uses())
- if (auto LD = dyn_cast<LoadInst>(U.getUser()))
- if (LD->getPointerOperand() == Alloca)
- return LD;
- return nullptr;
- }();
- if (!LDUse)
- return V;
+static SetVector<Value *> storedValues(Value *Val) {
+ SetVector<Value *> Vals;
+ if (!isa<LoadInst>(Val))
+ return Vals;
+ auto LD = cast<LoadInst>(Val);
+
+ DenseSet<Instruction *> Visited;
+ SmallVector<Value*> Addrs;
+
+ Addrs.push_back(LD->getPointerOperand());
+
+ while (!Addrs.empty()) {
+ auto Addr = Addrs.pop_back_val();
+ auto AddrI = dyn_cast<Instruction>(Addr);
+ if (!AddrI) continue ;
+ if (Visited.contains(AddrI ))
+ continue;
+ Visited.insert(AddrI);
+
+
+ for (auto &&U : AddrI->uses()) {
+ if (auto S = dyn_cast<StoreInst>(U.getUser())) {
+ assert(S->getPointerOperand() == AddrI);
+ auto V = S->getValueOperand();
+ if (auto ML = dyn_cast<LoadInst>(V))
+ Addrs.push_back (ML->getPointerOperand() );
+ else
+ Vals.insert(V);
+ } else
+ if (auto L = dyn_cast<LoadInst>(U.getUser())) {
+ Addrs.push_back(L->getPointerOperand());
+ }
+ }
+ }
+
+ return Vals;
+}
+static Value * followStorePtr(Value *Val) {
+ Value *V = Val;
+ if (!isa<LoadInst>(Val))
+ return V;
+ auto LD = cast<LoadInst>(Val);
+ auto Alloca = dyn_cast<AllocaInst>( LD->getPointerOperand());
+ if (!Alloca)
+ return V;
+
auto STUse = [](Instruction *Addr) -> StoreInst *{
for (auto &&U : Addr->uses())
if (auto ST = dyn_cast<StoreInst>(U.getUser()))
@@ -236,8 +267,6 @@ static Value * followStorePtr(Value *Addr) {
return nullptr;
}(Alloca);
return STUse;
-
- // }
}
static StoreInst *findAtomicInst(BasicBlock *EntryBB, Value *XVal) {
@@ -4060,8 +4089,8 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) {
AtomicCmpXchgInst *CmpExchg = dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
EXPECT_NE(CmpExchg, nullptr);
EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
- EXPECT_EQ(followStorePtr(CmpExchg->getCompareOperand()), ExpectedVal);
- EXPECT_EQ(followStorePtr(CmpExchg->getNewValOperand()), Sub);
+ EXPECT_TRUE(storedValues(CmpExchg->getCompareOperand()).contains( ExpectedVal));
+ EXPECT_TRUE(storedValues(CmpExchg->getNewValOperand()).contains( Sub));
EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
@@ -4084,7 +4113,8 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateFloat) {
Type *FloatTy = Type::getFloatTy(M->getContext());
AllocaInst *XVal = Builder.CreateAlloca(FloatTy);
XVal->setName("AtomicVar");
- Builder.CreateStore(ConstantFP::get(Type::getFloatTy(Ctx), 0.0), XVal);
+ auto ExpectedVal = ConstantFP::get(Type::getFloatTy(Ctx), 0.0);
+ Builder.CreateStore(ExpectedVal, XVal);
OpenMPIRBuilder::AtomicOpValue X = {XVal, FloatTy, false, false};
AtomicOrdering AO = AtomicOrdering::Monotonic;
Constant *ConstVal = ConstantFP::get(Type::getFloatTy(Ctx), 1.0);
@@ -4113,24 +4143,24 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateFloat) {
EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
EXPECT_NE(EndBB, nullptr);
- PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
- EXPECT_NE(Phi, nullptr);
- EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
- EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
- EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
+ //PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
+ //EXPECT_NE(Phi, nullptr);
+ //EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
+ //EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
+ //EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
EXPECT_EQ(Sub->getNumUses(), 1U);
StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
- ExtractValueInst *ExVI1 =
- dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB));
+ ExtractValueInst *ExVI1 = findLastInstInBB<ExtractValueInst>(ContBB);
EXPECT_NE(ExVI1, nullptr);
AtomicCmpXchgInst *CmpExchg =
dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
EXPECT_NE(CmpExchg, nullptr);
EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
- EXPECT_EQ(CmpExchg->getCompareOperand(), Phi);
+ EXPECT_TRUE(storedValues(CmpExchg->getCompareOperand()).contains( ExpectedVal));
+ EXPECT_TRUE(storedValues(CmpExchg->getNewValOperand()).contains( Sub));
EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
@@ -4152,7 +4182,8 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateIntr) {
Type *IntTy = Type::getInt32Ty(M->getContext());
AllocaInst *XVal = Builder.CreateAlloca(IntTy);
XVal->setName("AtomicVar");
- Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0), XVal);
+ auto ExpectedVal = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
+ Builder.CreateStore(ExpectedVal, XVal);
OpenMPIRBuilder::AtomicOpValue X = {XVal, IntTy, false, false};
AtomicOrdering AO = AtomicOrdering::Monotonic;
Constant *ConstVal = ConstantInt::get(Type::getInt32Ty(Ctx), 1);
@@ -4181,24 +4212,23 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateIntr) {
EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
EXPECT_NE(EndBB, nullptr);
- PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
- EXPECT_NE(Phi, nullptr);
- EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
- EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
- EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
+ //PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
+ //EXPECT_NE(Phi, nullptr);
+ //EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
+ //EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
+ //EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
EXPECT_EQ(Sub->getNumUses(), 1U);
StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
- ExtractValueInst *ExVI1 =
- dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB));
+ ExtractValueInst *ExVI1 = findLastInstInBB<ExtractValueInst>(ContBB);
EXPECT_NE(ExVI1, nullptr);
- AtomicCmpXchgInst *CmpExchg =
- dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
+ AtomicCmpXchgInst *CmpExchg = dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
EXPECT_NE(CmpExchg, nullptr);
EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
- EXPECT_EQ(CmpExchg->getCompareOperand(), Phi);
+ EXPECT_TRUE(storedValues(CmpExchg->getCompareOperand()).contains( ExpectedVal));
+ EXPECT_TRUE(storedValues(CmpExchg->getNewValOperand()).contains( Sub));
EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
>From 4d8474726c83e4f19973cdc7a071d15ae905edef Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Wed, 13 Nov 2024 17:22:56 +0100
Subject: [PATCH 09/17] Fix test
---
.../OpenMP/atomic-capture-complex.f90 | 67 ++++++++++---------
1 file changed, 35 insertions(+), 32 deletions(-)
diff --git a/flang/test/Integration/OpenMP/atomic-capture-complex.f90 b/flang/test/Integration/OpenMP/atomic-capture-complex.f90
index 4ffd18097d79ee..ce020d079bb593 100644
--- a/flang/test/Integration/OpenMP/atomic-capture-complex.f90
+++ b/flang/test/Integration/OpenMP/atomic-capture-complex.f90
@@ -6,39 +6,42 @@
! added to this directory and sub-directories.
!===----------------------------------------------------------------------===!
-!RUN: %if x86-registered-target %{ %flang_fc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fopenmp %s -o - | FileCheck --check-prefixes=CHECK,X86 %s %}
-!RUN: %if aarch64-registerd-target %{ %flang_fc1 -triple aarch64-unknown-linux-gnu -emit-llvm -fopenmp %s -o - | FileCheck --check-prefixes=CHECK,AARCH64 %s %}
+!RUN: %flang_fc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fopenmp %s -o - | FileCheck %s
-!CHECK: %[[X_NEW_VAL:.*]] = alloca { float, float }, align 8
-!CHECK: %[[VAL_1:.*]] = alloca { float, float }, i64 1, align 8
-!CHECK: %[[ORIG_VAL:.*]] = alloca { float, float }, i64 1, align 8
-!CHECK: store { float, float } { float 2.000000e+00, float 2.000000e+00 }, ptr %[[ORIG_VAL]], align 4
-!CHECK: br label %entry
-
-!CHECK: entry:
-!CHECK: %[[ATOMIC_TEMP_LOAD:.*]] = alloca { float, float }, align 8
-!CHECK: call void @__atomic_load(i64 8, ptr %[[ORIG_VAL]], ptr %[[ATOMIC_TEMP_LOAD]], i32 0)
-!CHECK: %[[PHI_NODE_ENTRY_1:.*]] = load { float, float }, ptr %[[ATOMIC_TEMP_LOAD]], align 8
-!CHECK: br label %.atomic.cont
-
-!CHECK: .atomic.cont
-!CHECK: %[[VAL_4:.*]] = phi { float, float } [ %[[PHI_NODE_ENTRY_1]], %entry ], [ %{{.*}}, %.atomic.cont ]
-!CHECK: %[[VAL_5:.*]] = extractvalue { float, float } %[[VAL_4]], 0
-!CHECK: %[[VAL_6:.*]] = extractvalue { float, float } %[[VAL_4]], 1
-!CHECK: %[[VAL_7:.*]] = fadd contract float %[[VAL_5]], 1.000000e+00
-!CHECK: %[[VAL_8:.*]] = fadd contract float %[[VAL_6]], 1.000000e+00
-!CHECK: %[[VAL_9:.*]] = insertvalue { float, float } undef, float %[[VAL_7]], 0
-!CHECK: %[[VAL_10:.*]] = insertvalue { float, float } %[[VAL_9]], float %[[VAL_8]], 1
-!CHECK: store { float, float } %[[VAL_10]], ptr %[[X_NEW_VAL]], align 4
-!CHECK: %[[VAL_11:.*]] = call i1 @__atomic_compare_exchange(i64 8, ptr %[[ORIG_VAL]], ptr %[[ATOMIC_TEMP_LOAD]], ptr %[[X_NEW_VAL]],
-!i32 2, i32 2)
-!CHECK: %[[VAL_12:.*]] = load { float, float }, ptr %[[ATOMIC_TEMP_LOAD]], align 4
-!CHECK: br i1 %[[VAL_11]], label %.atomic.exit, label %.atomic.cont
-
-!CHECK: .atomic.exit
-!AARCH64: %[[LCSSA:.*]] = phi { float, float } [ %[[VAL_10]], %.atomic.cont ]
-!AARCH64: store { float, float } %[[LCSSA]], ptr %[[VAL_1]], align 4
-!X86: store { float, float } %[[VAL_10]], ptr %[[VAL_1]], align 4
+! CHECK-LABEL: define {{.*}}@_QQmain(
+! CHECK-NEXT: %[[DOTATOMIC_ORIG_PTR:.+]] = alloca { float, float }, align 8
+! CHECK-NEXT: %[[DOTATOMIC_UPD_PTR:.+]] = alloca { float, float }, align 8
+! CHECK-NEXT: %[[TMP1:.+]] = alloca { float, float }, i64 1, align 8
+! CHECK-NEXT: %[[TMP2:.+]] = alloca { float, float }, i64 1, align 8
+! CHECK-NEXT: store { float, float } { float 2.000000e+00, float 2.000000e+00 }, ptr %[[TMP2]], align 4
+! CHECK-NEXT: br label %[[ENTRY:.+]]
+! CHECK-EMPTY:
+! CHECK-NEXT: [[ENTRY]]:
+! CHECK-NEXT: %[[DOTATOMIC_LOAD:.+]] = load atomic i64, ptr %[[TMP2]] monotonic, align 8
+! CHECK-NEXT: store i64 %[[DOTATOMIC_LOAD]], ptr %[[DOTATOMIC_ORIG_PTR]], align 8
+! CHECK-NEXT: br label %[[DOTATOMIC_RETRY:.+]]
+! CHECK-EMPTY:
+! CHECK-NEXT: [[DOTATOMIC_RETRY]]:
+! CHECK-NEXT: %[[DOTATOMIC_ORIG:.+]] = load { float, float }, ptr %[[DOTATOMIC_ORIG_PTR]], align 4
+! CHECK-NEXT: %[[TMP3:.+]] = extractvalue { float, float } %[[DOTATOMIC_ORIG]], 0
+! CHECK-NEXT: %[[TMP4:.+]] = extractvalue { float, float } %[[DOTATOMIC_ORIG]], 1
+! CHECK-NEXT: %[[TMP5:.+]] = fadd contract float %[[TMP3]], 1.000000e+00
+! CHECK-NEXT: %[[TMP6:.+]] = fadd contract float %[[TMP4]], 1.000000e+00
+! CHECK-NEXT: %[[TMP7:.+]] = insertvalue { float, float } undef, float %[[TMP5]], 0
+! CHECK-NEXT: %[[TMP8:.+]] = insertvalue { float, float } %[[TMP7]], float %[[TMP6]], 1
+! CHECK-NEXT: store { float, float } %[[TMP8]], ptr %[[DOTATOMIC_UPD_PTR]], align 4
+! CHECK-NEXT: %[[DOTCMPXCHG_EXPECTED:.+]] = load i64, ptr %[[DOTATOMIC_ORIG_PTR]], align 8
+! CHECK-NEXT: %[[DOTCMPXCHG_DESIRED:.+]] = load i64, ptr %[[DOTATOMIC_UPD_PTR]], align 8
+! CHECK-NEXT: %[[DOTCMPXCHG_PAIR:.+]] = cmpxchg weak ptr %[[TMP2]], i64 %[[DOTCMPXCHG_EXPECTED]], i64 %[[DOTCMPXCHG_DESIRED]] monotonic monotonic, align 8
+! CHECK-NEXT: %[[DOTCMPXCHG_PREV:.+]] = extractvalue { i64, i1 } %[[DOTCMPXCHG_PAIR]], 0
+! CHECK-NEXT: store i64 %[[DOTCMPXCHG_PREV]], ptr %[[DOTATOMIC_ORIG_PTR]], align 8
+! CHECK-NEXT: %[[DOTCMPXCHG_SUCCESS:.+]] = extractvalue { i64, i1 } %[[DOTCMPXCHG_PAIR]], 1
+! CHECK-NEXT: br i1 %[[DOTCMPXCHG_SUCCESS]], label %[[DOTATOMIC_DONE:.+]], label %[[DOTATOMIC_RETRY]]
+! CHECK-EMPTY:
+! CHECK-NEXT: [[DOTATOMIC_DONE]]:
+! CHECK-NEXT: store { float, float } %[[TMP8]], ptr %[[TMP1]], align 4
+! CHECK-NEXT: ret void
+! CHECK-NEXT: }
program main
complex*8 ia, ib
>From c84b6971d57eb5d343563f4a0fe8189ef550b03a Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Thu, 14 Nov 2024 05:35:49 +0100
Subject: [PATCH 10/17] Cleanup
---
.../OpenMP/atomic-capture-complex.f90 | 3 +-
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 3 +-
.../llvm/Transforms/Utils/BuildBuiltins.h | 61 ++++++++++++++++---
llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 22 ++++++-
4 files changed, 77 insertions(+), 12 deletions(-)
diff --git a/flang/test/Integration/OpenMP/atomic-capture-complex.f90 b/flang/test/Integration/OpenMP/atomic-capture-complex.f90
index ce020d079bb593..39d39b345523fc 100644
--- a/flang/test/Integration/OpenMP/atomic-capture-complex.f90
+++ b/flang/test/Integration/OpenMP/atomic-capture-complex.f90
@@ -6,7 +6,8 @@
! added to this directory and sub-directories.
!===----------------------------------------------------------------------===!
-!RUN: %flang_fc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fopenmp %s -o - | FileCheck %s
+!TODO: Disable passes that run even with -O0 (Clang: -disable-llvm-passes)
+!RUN: %flang_fc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fopenmp -mllvm --disable-llvm %s -o - | FileCheck %s
! CHECK-LABEL: define {{.*}}@_QQmain(
! CHECK-NEXT: %[[DOTATOMIC_ORIG_PTR:.+]] = alloca { float, float }, align 8
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index c9c8753f84b313..2be755ccfc9a4e 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -3084,8 +3084,7 @@ class OpenMPIRBuilder {
/// (e.g. true for X = X BinOp Expr)
///
/// \returns A pair of the old value of X before the update, and the value
- /// used for the update.
- /// FIXME: "Value used for the update"? Should be "the updated value"?
+ /// after the update.
Expected<std::pair<Value *, Value *>>
emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
diff --git a/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h b/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
index 95977752ac8976..d91881026c8e4c 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
@@ -6,7 +6,48 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements some functions for lowering compiler builtins.
+// This file implements some functions for lowering compiler builtins,
+// specifically for atomics. Currently, LLVM-IR has no representation of atomics
+// that can be used independent of its arguments:
+//
+// * The instructions load atomic, store atomic, atomicrmw, and cmpxchg can only
+// be used with constant memory model, sync scope, data sizes (that must be
+// power-of-2), volatile and weak property, and should not be used with data
+// types that are untypically large which may slow down the compiler.
+//
+// * libcall (in GCC's case: libatomic; LLVM: Compiler-RT) functions work with
+// any data size, but are slower. Specialized functions for a selected number
+// of data sizes exist as well. They do not support sync scops, the volatile
+// or weakness property. These functions may be implemented using a lock and
+// availability depends on the target triple (e.g. GPU devices cannot
+// implement a global lock by design).
+//
+// Whe want to mimic Clang's behaviour:
+//
+// * Prefer atomic instructions over libcall functions whenever possible. When a
+// target backend does not support atomic instructions natively,
+// AtomicExpandPass, LowerAtomicPass, or some backend-specific pass lower will
+// convert such instructions to a libcall function call. The reverse is not
+// the case, i.e. once a libcall function is emitted, there is no pass that
+// optimizes it into an instruction.
+//
+// * When passed a non-constant enum argument which the instruction requires to
+// be constant, then emit a switch case for each enum case.
+//
+// Clang currently doesn't actually check whether the target actually supports
+// atomic libcall functions so it will always fall back to a libcall function
+// even if the target does not support it. That is, emitting an atomic builtin
+// may fail and a frontend needs to handle this case.
+//
+// Clang also assumes that the maximum supported data size of atomic instruction
+// is 16, despite this is target-dependent and should be queried using
+// TargetLowing::getMaxAtomicSizeInBitsSupported(). However, TargetMachine
+// (which is a factory for TargetLowing) is not available during Clang's CodeGen
+// phase, it is only created for the LLVM pass pipeline.
+//
+// The functions in this file are intended to handle the complexity of builtins
+// so frontends do not need to care about the details. In the future LLVM may
+// introduce more generic atomic constructs that is lowered by an LLVM pass.
//
//===----------------------------------------------------------------------===//
@@ -33,10 +74,12 @@ namespace SyncScope {
typedef uint8_t ID;
}
+/// Emit the __atomic_load builtin. This may either be lowered to the load LLVM
+/// instruction, or to one of the following libcall functions: __atomic_load_1,
+/// __atomic_load_2, __atomic_load_4, __atomic_load_8, __atomic_load_16,
+/// __atomic_load.
Error emitAtomicLoadBuiltin(
- Value *Ptr, Value *RetPtr,
- // std::variant<Value *, bool> IsWeak,
- bool IsVolatile,
+ Value *Ptr, Value *RetPtr, bool IsVolatile,
std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> Memorder,
std::variant<Value *, SyncScope::ID, StringRef> Scope, Type *DataTy,
std::optional<uint64_t> DataSize, std::optional<uint64_t> AvailableSize,
@@ -47,10 +90,12 @@ Error emitAtomicLoadBuiltin(
bool AllowInstruction = true, bool AllowSwitch = true,
bool AllowSizedLibcall = true, bool AllowLibcall = true);
+/// Emit the __atomic_store builtin. It may either be lowered to the store LLVM
+/// instruction, or to one of the following libcall functions: __atomic_store_1,
+/// __atomic_store_2, __atomic_store_4, __atomic_store_8, __atomic_store_16,
+/// __atomic_static.
Error emitAtomicStoreBuiltin(
- Value *Ptr, Value *ValPtr,
- // std::variant<Value *, bool> IsWeak,
- bool IsVolatile,
+ Value *Ptr, Value *ValPtr, bool IsVolatile,
std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> Memorder,
std::variant<Value *, SyncScope::ID, StringRef> Scope, Type *DataTy,
std::optional<uint64_t> DataSize, std::optional<uint64_t> AvailableSize,
@@ -61,7 +106,7 @@ Error emitAtomicStoreBuiltin(
bool AllowInstruction = true, bool AllowSwitch = true,
bool AllowSizedLibcall = true, bool AllowLibcall = true);
-/// Emit a call to the __atomic_compare_exchange builtin. This may either be
+/// Emit the __atomic_compare_exchange builtin. This may either be
/// lowered to the cmpxchg LLVM instruction, or to one of the following libcall
/// functions: __atomic_compare_exchange_1, __atomic_compare_exchange_2,
/// __atomic_compare_exchange_4, __atomic_compare_exchange_8,
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 64c395e85b3a86..403b5c7b29569c 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1306,6 +1306,12 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
case LibFunc_atomic_load_4:
case LibFunc_atomic_load_8:
case LibFunc_atomic_load_16:
+ case LibFunc_atomic_store:
+ case LibFunc_atomic_store_1:
+ case LibFunc_atomic_store_2:
+ case LibFunc_atomic_store_4:
+ case LibFunc_atomic_store_8:
+ case LibFunc_atomic_store_16:
case LibFunc_atomic_compare_exchange:
case LibFunc_atomic_compare_exchange_1:
case LibFunc_atomic_compare_exchange_2:
@@ -1313,8 +1319,8 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
case LibFunc_atomic_compare_exchange_8:
case LibFunc_atomic_compare_exchange_16:
Changed |= setArgsNoUndef(F);
- Changed |= setWillReturn(F);
Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
break;
default:
// FIXME: It'd be really nice to cover all the library functions we're
@@ -1422,6 +1428,20 @@ FunctionCallee llvm::getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI,
case LibFunc_atomic_load_4:
case LibFunc_atomic_load_8:
case LibFunc_atomic_load_16:
+ setRetExtAttr(*F, TLI); // return
+ setArgExtAttr(*F, 3, TLI); // Memorder
+ break;
+
+ case LibFunc_atomic_store:
+ setArgExtAttr(*F, 4, TLI); // Memorder
+ break;
+
+ case LibFunc_atomic_store_1:
+ case LibFunc_atomic_store_2:
+ case LibFunc_atomic_store_4:
+ case LibFunc_atomic_store_8:
+ case LibFunc_atomic_store_16:
+ setArgExtAttr(*F, 2, TLI); // Val
setArgExtAttr(*F, 3, TLI); // Memorder
break;
>From f304c42d88533728042c336ff3e8b62f2567e44e Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Thu, 14 Nov 2024 15:45:25 +0100
Subject: [PATCH 11/17] Try createAtomicCompare
---
clang/lib/CodeGen/CGStmtOpenMP.cpp | 15 +-
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 6 +-
.../llvm/Transforms/Utils/BuildBuiltins.h | 11 +-
llvm/lib/Analysis/TargetLibraryInfo.cpp | 6 +-
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 173 ++++++++++++------
llvm/lib/Transforms/Utils/BuildBuiltins.cpp | 54 +++---
6 files changed, 172 insertions(+), 93 deletions(-)
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 390516fea38498..fd0eb7c42a31ab 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -6667,16 +6667,19 @@ static void emitOMPAtomicCompareExpr(
R->getType().isVolatileQualified()};
}
+ llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
+ CGF. AllocaInsertPt->getParent(),CGF. AllocaInsertPt->getIterator());
+
if (FailAO == llvm::AtomicOrdering::NotAtomic) {
// fail clause was not mentioned on the
// "#pragma omp atomic compare" construct.
- CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare(
- CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr,
- IsPostfixUpdate, IsFailOnly));
+ CGF.Builder.restoreIP( cantFail( OMPBuilder.createAtomicCompare(
+ CGF.Builder, AllocaIP, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr,
+ IsPostfixUpdate, IsFailOnly)));
} else
- CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare(
- CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr,
- IsPostfixUpdate, IsFailOnly, FailAO));
+ CGF.Builder.restoreIP(cantFail(OMPBuilder.createAtomicCompare(
+ CGF.Builder,AllocaIP, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr,
+ IsPostfixUpdate, IsFailOnly, FailAO)));
}
static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 2be755ccfc9a4e..4ecd297f71b63f 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -3250,12 +3250,12 @@ class OpenMPIRBuilder {
/// the case the comparison is '=='.
///
/// \return Insertion point after generated atomic capture IR.
- InsertPointTy
- createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X,
+ InsertPointOrErrorTy
+ createAtomicCompare(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D,
AtomicOrdering AO, omp::OMPAtomicCompareOp Op,
bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly);
- InsertPointTy createAtomicCompare(const LocationDescription &Loc,
+ InsertPointOrErrorTy createAtomicCompare(const LocationDescription &Loc, InsertPointTy AllocaIP,
AtomicOpValue &X, AtomicOpValue &V,
AtomicOpValue &R, Value *E, Value *D,
AtomicOrdering AO,
diff --git a/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h b/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
index d91881026c8e4c..303cc767f6bf27 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
@@ -143,13 +143,12 @@ Error emitAtomicStoreBuiltin(
/// supports integer and pointers only. If any other type or
/// omitted, type-prunes to an integer the holds at least \p
/// DataSize bytes.
-/// @param PrevPtr (optional) Receives the value at \p Ptr before the atomic
+/// @param ActualPtr (optional) Receives the value at \p Ptr before the atomic
/// exchange is attempted. This means:
-/// In case of success: The value at \p Ptr before the
-/// update. That is, the value passed behind \p ExpectedPtr.
-/// In case of failure: The current value at \p Ptr, i.e. the
-/// atomic exchange is effectively only performace an atomic
-/// load of that value.
+/// In case of success:
+/// The value at \p Ptr before the update. That is, the value passed behind \p ExpectedPtr.
+/// In case of failure (including spurious failures if IsWeak):
+/// The current value at \p Ptr, i.e. the operation effectively was an atomic load of that value using FailureMemorder semantics.
/// @param DataSize Number of bytes to be exchanged.
/// @param AvailableSize The total size that can be used for the atomic
/// operation. It may include trailing padding in addition to
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 27a7ff9ef55352..c629e582c15c53 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -68,10 +68,10 @@ enum FuncArgTypeID : char {
Long, // Either 32 or 64 bits.
IntX, // Any integer type.
Int64,
- LLong, // 64 bits on all targets.
- SizeT, // size_t.
- SSizeT, // POSIX ssize_t.
Int128,
+ LLong, // 64 bits on all targets.
+ SizeT, // size_t.
+ SSizeT, // POSIX ssize_t.
Flt, // IEEE float.
Dbl, // IEEE double.
LDbl, // Any floating type (TODO: tighten this up).
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 3eb600b6947a08..094eeadc64f552 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -8038,14 +8038,16 @@ OpenMPIRBuilder::createAtomicRead(const LocationDescription &Loc,
assert(X.Var->getType()->isPointerTy() &&
"OMP Atomic expects a pointer to target memory");
assert(V.Var->getType()->isPointerTy() &&
- "OMP Atomic expects a pointer for atomic load result");
+ "Expecting a pointer for atomic load result");
Type *XElemTy = X.ElemTy;
+ // TODO: Get TLI and TL from frontend
Triple T(Builder.GetInsertBlock()->getModule()->getTargetTriple());
TargetLibraryInfoImpl TLII(T);
TargetLibraryInfo TLI(TLII);
+TargetLowering *TL = nullptr;
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
- Twine Name(X.Var->getName());
+ Twine Name=X.Var->getName();
Error ALResult =
emitAtomicLoadBuiltin(X.Var,
@@ -8060,7 +8062,7 @@ OpenMPIRBuilder::createAtomicRead(const LocationDescription &Loc,
/*Builder=*/Builder,
/*DL=*/DL,
/*TLI=*/&TLI,
- /*TL=*/nullptr,
+ /*TL=*/TL,
/*SyncScopes=*/{},
/*FallbackScope=*/StringRef(),
/*Name=*/Name + ".atomic.read");
@@ -8069,7 +8071,6 @@ OpenMPIRBuilder::createAtomicRead(const LocationDescription &Loc,
checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
- // LoadInst *LoadedVal= Builder.CreateLoad(XElemTy, X.Var, Name );
return Builder.saveIP();
}
@@ -8080,23 +8081,23 @@ OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
if (!updateToLocation(Loc))
return Loc.IP;
+ assert(!isConflictIP(Loc.IP, AllocaIP) && "IPs must not be ambiguous");
assert(X.Var->getType()->isPointerTy() &&
"OMP Atomic expects a pointer to target memory");
Type *XElemTy = X.ElemTy;
- assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
- XElemTy->isPointerTy()) &&
- "OMP atomic write expected a scalar type");
+ // TODO: Get TLI and TL from frontend
Triple T(Builder.GetInsertBlock()->getModule()->getTargetTriple());
TargetLibraryInfoImpl TLII(T);
TargetLibraryInfo TLI(TLII);
+TargetLowering *TL = nullptr;
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
- Twine Name(X.Var->getName());
+ Twine Name=X.Var->getName();
// Reserve some stack space.
- auto ContIP = Builder.saveIP();
+ InsertPointTy ContIP = Builder.saveIP();
Builder.restoreIP(AllocaIP);
- auto ValPtr = Builder.CreateAlloca(XElemTy, nullptr, Name + ".atomic.val");
+ Value * ValPtr = Builder.CreateAlloca(XElemTy, nullptr, Name + ".atomic.val");
Builder.restoreIP(ContIP);
Builder.CreateStore(Expr, ValPtr);
@@ -8112,7 +8113,7 @@ OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
/*Builder=*/Builder,
/*DL=*/DL,
/*TLI=*/&TLI,
- /*TL=*/nullptr,
+ /*TL=*/TL,
/*SyncScopes=*/{},
/*FallbackScope=*/StringRef(),
/*Name=*/Name + ".atomic.write");
@@ -8136,9 +8137,6 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
assert(XTy->isPointerTy() &&
"OMP Atomic expects a pointer to target memory");
Type *XElemTy = X.ElemTy;
- assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
- XElemTy->isPointerTy()) &&
- "OMP atomic update expected a scalar type");
assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
(RMWOp != AtomicRMWInst::UMax) && (RMWOp != AtomicRMWInst::UMin) &&
"OpenMP atomic does not support LT or GT operations");
@@ -8225,19 +8223,22 @@ Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
return Res;
}
+ // TODO: Get TLI and TL from frontend
Triple T(Builder.GetInsertBlock()->getModule()->getTargetTriple());
TargetLibraryInfoImpl TLII(T);
TargetLibraryInfo TLI(TLII);
+TargetLowering *TL = nullptr;
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
Twine Name(X->getName());
+
// Reserve some stack space.
InsertPointTy InitIP = Builder.saveIP();
Builder.restoreIP(AllocaIP);
AllocaInst *OrigPtr =
- Builder.CreateAlloca(XElemTy, nullptr, Name + ".atomic.orig.ptr");
+ Builder.CreateAlloca(XElemTy, nullptr, Name + ".atomic.expected.ptr");
AllocaInst *UpdPtr =
- Builder.CreateAlloca(XElemTy, nullptr, Name + ".atomic.upd.ptr");
+ Builder.CreateAlloca(XElemTy, nullptr, Name + ".atomic.desired.ptr");
Builder.restoreIP(InitIP);
// Old value for first transaction. Every followup-transaction will use the
@@ -8254,7 +8255,7 @@ Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
/*Builder=*/Builder,
/*DL=*/DL,
/*TLI=*/&TLI,
- /*TL=*/nullptr,
+ /*TL=*/TL,
/*SyncScopes=*/{},
/*FallbackScope=*/StringRef(),
/*Name=*/Name);
@@ -8262,15 +8263,13 @@ Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
return std::move(ALResult);
// Create new CFG.
- BasicBlock *DoneBB = splitBB(Builder, false, X->getName() + ".atomic.done");
- BasicBlock *RetryBB = splitBB(Builder, true, X->getName() + ".atomic.retry");
+ BasicBlock *DoneBB = splitBBWithSuffix(Builder, false, ".atomic.done");
+ BasicBlock *RetryBB = splitBBWithSuffix(Builder, true, ".atomic.retry");
// Emit the update transaction...
Builder.SetInsertPoint(RetryBB);
// 1. Let the user code compute the new value.
- // FIXME: This should not be done by-value, as the type might be unreasonable
- // large (e.g. i4096) and LLVM does not scale well with such large types.
Value *OrigVal = Builder.CreateLoad(XElemTy, OrigPtr, Name + ".atomic.orig");
Expected<Value *> CBResult = UpdateOp(OrigVal, Builder);
if (!CBResult)
@@ -8287,7 +8286,7 @@ Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
/*IsVolatile=*/false,
/*SuccessMemorder=*/AO,
/*FailureMemorder=*/{},
- /*PrevPtr=*/OrigPtr,
+ /*ActualPtr=*/OrigPtr,
/*DataTy=*/XElemTy,
/*DataSize=*/{},
/*AvailableSize=*/{},
@@ -8304,7 +8303,7 @@ Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
// 3. Repeat transaction until successful.
Builder.CreateCondBr(Success, DoneBB, RetryBB);
- // Continue when the update transaction was successful.
+ // Continue with user code when the update transaction was successful.
Builder.SetInsertPoint(DoneBB);
return std::make_pair(OrigVal, UpdVal);
@@ -8323,9 +8322,6 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
assert(XTy->isPointerTy() &&
"OMP Atomic expects a pointer to target memory");
Type *XElemTy = X.ElemTy;
- assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
- XElemTy->isPointerTy()) &&
- "OMP atomic capture expected a scalar type");
assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
"OpenMP atomic does not support LT or GT operations");
});
@@ -8346,23 +8342,23 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
return Builder.saveIP();
}
-OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
- const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V,
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCompare(
+ const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V,
AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO,
omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate,
bool IsFailOnly) {
AtomicOrdering Failure = AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
- return createAtomicCompare(Loc, X, V, R, E, D, AO, Op, IsXBinopExpr,
+ return createAtomicCompare(Loc, AllocaIP, X, V, R, E, D, AO, Op, IsXBinopExpr,
IsPostfixUpdate, IsFailOnly, Failure);
}
-OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
- const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V,
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCompare(
+ const LocationDescription &Loc,InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V,
AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO,
omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate,
bool IsFailOnly, AtomicOrdering Failure) {
-
+ assert(!isConflictIP(Loc.IP, AllocaIP)&& "IPs must not conflict");
if (!updateToLocation(Loc))
return Loc.IP;
@@ -8377,25 +8373,102 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
bool IsInteger = E->getType()->isIntegerTy();
if (Op == OMPAtomicCompareOp::EQ) {
+ // TODO: Get TLI and TL from frontend
+ Triple T(Builder.GetInsertBlock()->getModule()->getTargetTriple());
+ TargetLibraryInfoImpl TLII(T);
+ TargetLibraryInfo TLI(TLII);
+TargetLowering *TL = nullptr;
+ const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
+ Twine Name(X.Var->getName());
+ Type *ElemTy = X.Var->getType();
+
+
+ // Reserve some stack space.
+ InsertPointTy InitIP = Builder.saveIP();
+ Builder.restoreIP(AllocaIP);
+ AllocaInst *OrigPtr = Builder.CreateAlloca(ElemTy, nullptr, Name + ".atomic.expected.addr");
+ AllocaInst *UpdPtr = Builder.CreateAlloca(ElemTy, nullptr, Name + ".atomic.desired.addr");
+ Builder.restoreIP(InitIP);
+
+ Builder.CreateStore(E, OrigPtr );
+ Builder.CreateStore(D, UpdPtr );
+
+ // 2. AtomicCompareExchange to replace OrigVal with UpdVal.
+ Expected<Value *> ACEResult = emitAtomicCompareExchangeBuiltin(
+ /*Ptr=*/X.Var,
+ /*ExpectedPtr=*/OrigPtr,
+ /*DesiredPtr=*/UpdPtr,
+ /*IsWeak=*/false,
+ /*IsVolatile=*/X.IsVolatile,
+ /*SuccessMemorder=*/AO,
+ /*FailureMemorder=*/{},
+ /*ActualPtr=*/OrigPtr,
+ /*DataTy=*/ElemTy,
+ /*DataSize=*/{},
+ /*AvailableSize=*/{},
+ /*Align=*/{},
+ /*Builder=*/Builder,
+ /*DL=*/DL,
+ /*TLI=*/&TLI,
+ /*TL=*/nullptr,
+ /*Name=*/Name);
+ if (!ACEResult)
+ return ACEResult.takeError();
+ Value *SuccessOrFail = *ACEResult;
+
+ if (V.Var) {
+ Value *OldValue = Builder.CreateLoad(ElemTy, OrigPtr, Name + ".atomic.actual");
+ if (IsPostfixUpdate) {
+ Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
+ } else {
+ if (IsFailOnly) {
+ // CurBB----
+ // | |
+ // v |
+ // ContBB |
+ // | |
+ // v |
+ // ExitBB <-
+ //
+ // where ContBB only contains the store of old value to 'v'.
+
+ BasicBlock *ExitBB = splitBBWithSuffix(Builder, false, ".atomic.exit");
+ BasicBlock *ContBB = splitBBWithSuffix(Builder, true, ".atomic.cont");
+
+ Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
+
+
+ Builder.SetInsertPoint(ContBB);
+ Builder.CreateStore(OldValue, V.Var);
+ Builder.CreateBr(ExitBB);
+
+
+ Builder.SetInsertPoint(ExitBB);
+ } else {
+ Value *CapturedValue = Builder.CreateSelect(SuccessOrFail, E, OldValue);
+ Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
+ }
+ }
+ } else {
+ }
+
+
+ #if 0
AtomicCmpXchgInst *Result = nullptr;
if (!IsInteger) {
- IntegerType *IntCastTy =
- IntegerType::get(M.getContext(), X.ElemTy->getScalarSizeInBits());
+ IntegerType *IntCastTy = IntegerType::get(M.getContext(), X.ElemTy->getScalarSizeInBits());
Value *EBCast = Builder.CreateBitCast(E, IntCastTy);
Value *DBCast = Builder.CreateBitCast(D, IntCastTy);
- Result = Builder.CreateAtomicCmpXchg(X.Var, EBCast, DBCast, MaybeAlign(),
- AO, Failure);
+ Result = Builder.CreateAtomicCmpXchg(X.Var, EBCast, DBCast, MaybeAlign(), AO, Failure);
} else {
- Result =
- Builder.CreateAtomicCmpXchg(X.Var, E, D, MaybeAlign(), AO, Failure);
+ Result = Builder.CreateAtomicCmpXchg(X.Var, E, D, MaybeAlign(), AO, Failure);
}
if (V.Var) {
Value *OldValue = Builder.CreateExtractValue(Result, /*Idxs=*/0);
if (!IsInteger)
OldValue = Builder.CreateBitCast(OldValue, X.ElemTy);
- assert(OldValue->getType() == V.ElemTy &&
- "OldValue and V must be of same type");
+ assert(OldValue->getType() == V.ElemTy && "OldValue and V must be of same type");
if (IsPostfixUpdate) {
Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
} else {
@@ -8413,10 +8486,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
BasicBlock *CurBB = Builder.GetInsertBlock();
Instruction *CurBBTI = CurBB->getTerminator();
CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
- BasicBlock *ExitBB = CurBB->splitBasicBlock(
- CurBBTI, X.Var->getName() + ".atomic.exit");
- BasicBlock *ContBB = CurBB->splitBasicBlock(
- CurBB->getTerminator(), X.Var->getName() + ".atomic.cont");
+ BasicBlock *ExitBB = CurBB->splitBasicBlock(CurBBTI, X.Var->getName() + ".atomic.exit");
+ BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(), X.Var->getName() + ".atomic.cont");
ContBB->getTerminator()->eraseFromParent();
CurBB->getTerminator()->eraseFromParent();
@@ -8426,30 +8497,28 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
Builder.CreateStore(OldValue, V.Var);
Builder.CreateBr(ExitBB);
- if (UnreachableInst *ExitTI =
- dyn_cast<UnreachableInst>(ExitBB->getTerminator())) {
+ if (UnreachableInst *ExitTI = dyn_cast<UnreachableInst>(ExitBB->getTerminator())) {
CurBBTI->eraseFromParent();
Builder.SetInsertPoint(ExitBB);
} else {
Builder.SetInsertPoint(ExitTI);
}
} else {
- Value *CapturedValue =
- Builder.CreateSelect(SuccessOrFail, E, OldValue);
+ Value *CapturedValue = Builder.CreateSelect(SuccessOrFail, E, OldValue);
Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
}
}
}
+ #endif
+
// The comparison result has to be stored.
if (R.Var) {
assert(R.Var->getType()->isPointerTy() &&
"r.var must be of pointer type");
- assert(R.ElemTy->isIntegerTy() && "r must be of integral type");
- Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1);
Value *ResultCast = R.IsSigned
- ? Builder.CreateSExt(SuccessFailureVal, R.ElemTy)
- : Builder.CreateZExt(SuccessFailureVal, R.ElemTy);
+ ? Builder.CreateSExt(SuccessOrFail, R.ElemTy)
+ : Builder.CreateZExt(SuccessOrFail, R.ElemTy);
Builder.CreateStore(ResultCast, R.Var, R.IsVolatile);
}
} else {
diff --git a/llvm/lib/Transforms/Utils/BuildBuiltins.cpp b/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
index ce4ba8044b929f..d9860b623bdd14 100644
--- a/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
+++ b/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
@@ -79,7 +79,7 @@ class AtomicEmitter {
std::variant<std::monostate, Value *, AtomicOrdering, AtomicOrderingCABI>
FailureMemorder,
std::variant<Value *, SyncScope::ID, StringRef> Scope,
- // Value *PrevPtr,
+ // Value *ActualPtr,
Type *DataTy, std::optional<uint64_t> DataSize,
std::optional<uint64_t> AvailableSize, MaybeAlign Align,
IRBuilderBase &Builder, const DataLayout &DL,
@@ -377,10 +377,8 @@ class AtomicEmitter {
virtual Expected<Value *> makeFallbackError() = 0;
- Expected<Value *> emit() {
+ Expected<Value *> emit(bool CoerceType = false) {
assert(Ptr->getType()->isPointerTy());
- // assert(ExpectedPtr->getType()->isPointerTy());
- // assert(DesiredPtr->getType()->isPointerTy());
assert(TLI);
unsigned MaxAtomicSizeSupported = 16;
@@ -431,14 +429,20 @@ class AtomicEmitter {
EffectiveAlign = Ptr->getPointerAlignment(DL);
}
- // Only use the original data type if it is compatible with cmpxchg (and sized
+ // Only use the original data type if it is compatible with the atomic instruction (and sized
// libcall function) and matches the preferred size. No type punning needed
- // for __atomic_compare_exchange which only takes pointers.
- if (DataTy && DataSizeConst == PreferredSize &&
- (DataTy->isIntegerTy() || DataTy->isPointerTy()))
- CoercedTy = DataTy;
- else if (PreferredSize <= 16)
- CoercedTy = IntegerType::get(Ctx, PreferredSize * 8);
+ // when the libcall function only takes pointers.
+ CoercedTy = DataTy;
+ // If we have rounded-up the data size, unconditionally coerce to a different type.
+ if (DataSizeConst != PreferredSize)
+ CoercedTy = IntegerType::get(Ctx, PreferredSize * 8);
+ if (CoerceType) {
+ if (DataTy && DataSizeConst == PreferredSize &&
+ (DataTy->isIntegerTy() || DataTy->isPointerTy()))
+ CoercedTy = DataTy;
+ else if (PreferredSize <= 16)
+ CoercedTy = IntegerType::get(Ctx, PreferredSize * 8);
+}
// For resolving the SuccessMemorder/FailureMemorder arguments. If it is
// constant, determine the AtomicOrdering for use with the cmpxchg
@@ -450,7 +454,7 @@ class AtomicEmitter {
// Derive FailureMemorder from SucccessMemorder
if (SuccessMemorderConst) {
AtomicOrdering MOFailure =
- llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(
+ AtomicCmpXchgInst::getStrongestFailureOrdering(
*SuccessMemorderConst);
MemorderVariant = MOFailure;
}
@@ -749,21 +753,25 @@ class AtomicCompareExchangeEmitter final : public AtomicEmitter {
using AtomicEmitter::AtomicEmitter;
Expected<Value *> emitCmpXchg(Value *ExpectedPtr, Value *DesiredPtr,
- Value *PrevPtr) {
+ Value *ActualPtr) {
assert(ExpectedPtr->getType()->isPointerTy());
assert(DesiredPtr->getType()->isPointerTy());
- assert(!PrevPtr || PrevPtr->getType()->isPointerTy());
+ assert(!ActualPtr || ActualPtr->getType()->isPointerTy());
+ assert(Ptr != ExpectedPtr);
+ assert(Ptr != DesiredPtr);
+ assert(Ptr != ActualPtr);
+ assert(ActualPtr != DesiredPtr);
this->ExpectedPtr = ExpectedPtr;
this->DesiredPtr = DesiredPtr;
- this->PrevPtr = PrevPtr;
- return emit();
+ this->ActualPtr = ActualPtr;
+ return emit(/*CoerceType*/true);
}
protected:
Value *ExpectedPtr;
Value *DesiredPtr;
- Value *PrevPtr;
+ Value *ActualPtr;
Value *ExpectedVal;
Value *DesiredVal;
@@ -785,10 +793,10 @@ class AtomicCompareExchangeEmitter final : public AtomicEmitter {
AtomicInst->setWeak(IsWeak);
AtomicInst->setVolatile(IsVolatile);
- if (PrevPtr) {
+ if (ActualPtr) {
Value *PreviousVal = Builder.CreateExtractValue(AtomicInst, /*Idxs=*/0,
Name + ".cmpxchg.prev");
- Builder.CreateStore(PreviousVal, PrevPtr);
+ Builder.CreateStore(PreviousVal, ActualPtr);
}
Value *SuccessFailureVal = Builder.CreateExtractValue(
AtomicInst, /*Idxs=*/1, Name + ".cmpxchg.success");
@@ -809,8 +817,8 @@ class AtomicCompareExchangeEmitter final : public AtomicEmitter {
Builder.CreateCmp(CmpInst::Predicate::ICMP_EQ, SuccessResult,
Builder.getInt8(0), Name + ".cmpxchg.success");
- if (PrevPtr && PrevPtr != ExpectedPtr)
- Builder.CreateMemCpy(PrevPtr, {}, ExpectedPtr, {}, DataSizeConst);
+ if (ActualPtr && ActualPtr != ExpectedPtr)
+ Builder.CreateMemCpy(ActualPtr, {}, ExpectedPtr, {}, DataSizeConst);
return SuccessBool;
}
@@ -847,8 +855,8 @@ class AtomicCompareExchangeEmitter final : public AtomicEmitter {
Builder.CreateCmp(CmpInst::Predicate::ICMP_EQ, SuccessResult,
Builder.getInt8(0), Name + ".cmpxchg.success");
- if (PrevPtr && PrevPtr != ExpectedPtr)
- Builder.CreateMemCpy(PrevPtr, {}, ExpectedPtr, {}, DataSizeConst);
+ if (ActualPtr && ActualPtr != ExpectedPtr)
+ Builder.CreateMemCpy(ActualPtr, {}, ExpectedPtr, {}, DataSizeConst);
return SuccessBool;
}
>From 9f31dacb24309caba79303e4b1b6714ac55cce5e Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Thu, 14 Nov 2024 19:06:22 +0100
Subject: [PATCH 12/17] WIP
---
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 6 +-
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 122 ++++--------------
2 files changed, 28 insertions(+), 100 deletions(-)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 4ecd297f71b63f..2be755ccfc9a4e 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -3250,12 +3250,12 @@ class OpenMPIRBuilder {
/// the case the comparison is '=='.
///
/// \return Insertion point after generated atomic capture IR.
- InsertPointOrErrorTy
- createAtomicCompare(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
+ InsertPointTy
+ createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X,
AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D,
AtomicOrdering AO, omp::OMPAtomicCompareOp Op,
bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly);
- InsertPointOrErrorTy createAtomicCompare(const LocationDescription &Loc, InsertPointTy AllocaIP,
+ InsertPointTy createAtomicCompare(const LocationDescription &Loc,
AtomicOpValue &X, AtomicOpValue &V,
AtomicOpValue &R, Value *E, Value *D,
AtomicOrdering AO,
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 094eeadc64f552..56d38d22113118 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -8349,16 +8349,16 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCompare(
bool IsFailOnly) {
AtomicOrdering Failure = AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
- return createAtomicCompare(Loc, AllocaIP, X, V, R, E, D, AO, Op, IsXBinopExpr,
+ return createAtomicCompare(Loc, X, V, R, E, D, AO, Op, IsXBinopExpr,
IsPostfixUpdate, IsFailOnly, Failure);
}
-OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCompare(
- const LocationDescription &Loc,InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V,
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
+ const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V,
AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO,
omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate,
bool IsFailOnly, AtomicOrdering Failure) {
- assert(!isConflictIP(Loc.IP, AllocaIP)&& "IPs must not conflict");
+
if (!updateToLocation(Loc))
return Loc.IP;
@@ -8373,102 +8373,26 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCompare(
bool IsInteger = E->getType()->isIntegerTy();
if (Op == OMPAtomicCompareOp::EQ) {
- // TODO: Get TLI and TL from frontend
- Triple T(Builder.GetInsertBlock()->getModule()->getTargetTriple());
- TargetLibraryInfoImpl TLII(T);
- TargetLibraryInfo TLI(TLII);
-TargetLowering *TL = nullptr;
- const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
- Twine Name(X.Var->getName());
- Type *ElemTy = X.Var->getType();
-
-
- // Reserve some stack space.
- InsertPointTy InitIP = Builder.saveIP();
- Builder.restoreIP(AllocaIP);
- AllocaInst *OrigPtr = Builder.CreateAlloca(ElemTy, nullptr, Name + ".atomic.expected.addr");
- AllocaInst *UpdPtr = Builder.CreateAlloca(ElemTy, nullptr, Name + ".atomic.desired.addr");
- Builder.restoreIP(InitIP);
-
- Builder.CreateStore(E, OrigPtr );
- Builder.CreateStore(D, UpdPtr );
-
- // 2. AtomicCompareExchange to replace OrigVal with UpdVal.
- Expected<Value *> ACEResult = emitAtomicCompareExchangeBuiltin(
- /*Ptr=*/X.Var,
- /*ExpectedPtr=*/OrigPtr,
- /*DesiredPtr=*/UpdPtr,
- /*IsWeak=*/false,
- /*IsVolatile=*/X.IsVolatile,
- /*SuccessMemorder=*/AO,
- /*FailureMemorder=*/{},
- /*ActualPtr=*/OrigPtr,
- /*DataTy=*/ElemTy,
- /*DataSize=*/{},
- /*AvailableSize=*/{},
- /*Align=*/{},
- /*Builder=*/Builder,
- /*DL=*/DL,
- /*TLI=*/&TLI,
- /*TL=*/nullptr,
- /*Name=*/Name);
- if (!ACEResult)
- return ACEResult.takeError();
- Value *SuccessOrFail = *ACEResult;
-
- if (V.Var) {
- Value *OldValue = Builder.CreateLoad(ElemTy, OrigPtr, Name + ".atomic.actual");
- if (IsPostfixUpdate) {
- Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
- } else {
- if (IsFailOnly) {
- // CurBB----
- // | |
- // v |
- // ContBB |
- // | |
- // v |
- // ExitBB <-
- //
- // where ContBB only contains the store of old value to 'v'.
-
- BasicBlock *ExitBB = splitBBWithSuffix(Builder, false, ".atomic.exit");
- BasicBlock *ContBB = splitBBWithSuffix(Builder, true, ".atomic.cont");
-
- Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
-
-
- Builder.SetInsertPoint(ContBB);
- Builder.CreateStore(OldValue, V.Var);
- Builder.CreateBr(ExitBB);
-
-
- Builder.SetInsertPoint(ExitBB);
- } else {
- Value *CapturedValue = Builder.CreateSelect(SuccessOrFail, E, OldValue);
- Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
- }
- }
- } else {
- }
-
-
- #if 0
AtomicCmpXchgInst *Result = nullptr;
+ // TODO: Use emitAtomicCompareExchangeBuiltin(...)
if (!IsInteger) {
- IntegerType *IntCastTy = IntegerType::get(M.getContext(), X.ElemTy->getScalarSizeInBits());
+ IntegerType *IntCastTy =
+ IntegerType::get(M.getContext(), X.ElemTy->getScalarSizeInBits());
Value *EBCast = Builder.CreateBitCast(E, IntCastTy);
Value *DBCast = Builder.CreateBitCast(D, IntCastTy);
- Result = Builder.CreateAtomicCmpXchg(X.Var, EBCast, DBCast, MaybeAlign(), AO, Failure);
+ Result = Builder.CreateAtomicCmpXchg(X.Var, EBCast, DBCast, MaybeAlign(),
+ AO, Failure);
} else {
- Result = Builder.CreateAtomicCmpXchg(X.Var, E, D, MaybeAlign(), AO, Failure);
+ Result =
+ Builder.CreateAtomicCmpXchg(X.Var, E, D, MaybeAlign(), AO, Failure);
}
if (V.Var) {
Value *OldValue = Builder.CreateExtractValue(Result, /*Idxs=*/0);
if (!IsInteger)
OldValue = Builder.CreateBitCast(OldValue, X.ElemTy);
- assert(OldValue->getType() == V.ElemTy && "OldValue and V must be of same type");
+ assert(OldValue->getType() == V.ElemTy &&
+ "OldValue and V must be of same type");
if (IsPostfixUpdate) {
Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
} else {
@@ -8486,8 +8410,10 @@ TargetLowering *TL = nullptr;
BasicBlock *CurBB = Builder.GetInsertBlock();
Instruction *CurBBTI = CurBB->getTerminator();
CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
- BasicBlock *ExitBB = CurBB->splitBasicBlock(CurBBTI, X.Var->getName() + ".atomic.exit");
- BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(), X.Var->getName() + ".atomic.cont");
+ BasicBlock *ExitBB = CurBB->splitBasicBlock(
+ CurBBTI, X.Var->getName() + ".atomic.exit");
+ BasicBlock *ContBB = CurBB->splitBasicBlock(
+ CurBB->getTerminator(), X.Var->getName() + ".atomic.cont");
ContBB->getTerminator()->eraseFromParent();
CurBB->getTerminator()->eraseFromParent();
@@ -8497,28 +8423,30 @@ TargetLowering *TL = nullptr;
Builder.CreateStore(OldValue, V.Var);
Builder.CreateBr(ExitBB);
- if (UnreachableInst *ExitTI = dyn_cast<UnreachableInst>(ExitBB->getTerminator())) {
+ if (UnreachableInst *ExitTI =
+ dyn_cast<UnreachableInst>(ExitBB->getTerminator())) {
CurBBTI->eraseFromParent();
Builder.SetInsertPoint(ExitBB);
} else {
Builder.SetInsertPoint(ExitTI);
}
} else {
- Value *CapturedValue = Builder.CreateSelect(SuccessOrFail, E, OldValue);
+ Value *CapturedValue =
+ Builder.CreateSelect(SuccessOrFail, E, OldValue);
Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
}
}
}
- #endif
-
// The comparison result has to be stored.
if (R.Var) {
assert(R.Var->getType()->isPointerTy() &&
"r.var must be of pointer type");
+ assert(R.ElemTy->isIntegerTy() && "r must be of integral type");
+ Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1);
Value *ResultCast = R.IsSigned
- ? Builder.CreateSExt(SuccessOrFail, R.ElemTy)
- : Builder.CreateZExt(SuccessOrFail, R.ElemTy);
+ ? Builder.CreateSExt(SuccessFailureVal, R.ElemTy)
+ : Builder.CreateZExt(SuccessFailureVal, R.ElemTy);
Builder.CreateStore(ResultCast, R.Var, R.IsVolatile);
}
} else {
>From cf3537b276603fd5be6f0e5c62191bfab441c494 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Thu, 14 Nov 2024 19:06:32 +0100
Subject: [PATCH 13/17] WIP
---
clang/lib/CodeGen/CGStmtOpenMP.cpp | 15 ++--
.../llvm/Transforms/Utils/BuildBuiltins.h | 21 ++----
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 33 ++++-----
llvm/lib/Transforms/Utils/BuildBuiltins.cpp | 70 +++++++------------
4 files changed, 51 insertions(+), 88 deletions(-)
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index fd0eb7c42a31ab..390516fea38498 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -6667,19 +6667,16 @@ static void emitOMPAtomicCompareExpr(
R->getType().isVolatileQualified()};
}
- llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
- CGF. AllocaInsertPt->getParent(),CGF. AllocaInsertPt->getIterator());
-
if (FailAO == llvm::AtomicOrdering::NotAtomic) {
// fail clause was not mentioned on the
// "#pragma omp atomic compare" construct.
- CGF.Builder.restoreIP( cantFail( OMPBuilder.createAtomicCompare(
- CGF.Builder, AllocaIP, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr,
- IsPostfixUpdate, IsFailOnly)));
+ CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare(
+ CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr,
+ IsPostfixUpdate, IsFailOnly));
} else
- CGF.Builder.restoreIP(cantFail(OMPBuilder.createAtomicCompare(
- CGF.Builder,AllocaIP, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr,
- IsPostfixUpdate, IsFailOnly, FailAO)));
+ CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare(
+ CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr,
+ IsPostfixUpdate, IsFailOnly, FailAO));
}
static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
diff --git a/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h b/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
index 303cc767f6bf27..e8291e963e8aa2 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
@@ -79,7 +79,7 @@ typedef uint8_t ID;
/// __atomic_load_2, __atomic_load_4, __atomic_load_8, __atomic_load_16,
/// __atomic_load.
Error emitAtomicLoadBuiltin(
- Value *Ptr, Value *RetPtr, bool IsVolatile,
+ Value *AtomicPtr, Value *RetPtr, bool IsVolatile,
std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> Memorder,
std::variant<Value *, SyncScope::ID, StringRef> Scope, Type *DataTy,
std::optional<uint64_t> DataSize, std::optional<uint64_t> AvailableSize,
@@ -95,7 +95,7 @@ Error emitAtomicLoadBuiltin(
/// __atomic_store_2, __atomic_store_4, __atomic_store_8, __atomic_store_16,
/// __atomic_static.
Error emitAtomicStoreBuiltin(
- Value *Ptr, Value *ValPtr, bool IsVolatile,
+ Value *AtomicPtr, Value *ValPtr, bool IsVolatile,
std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> Memorder,
std::variant<Value *, SyncScope::ID, StringRef> Scope, Type *DataTy,
std::optional<uint64_t> DataSize, std::optional<uint64_t> AvailableSize,
@@ -118,7 +118,7 @@ Error emitAtomicStoreBuiltin(
/// https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
/// https://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary#GCC_intrinsics
///
-/// @param Ptr The memory location accessed atomically.
+/// @param AtomicPtr The memory location accessed atomically.
/// @Param ExpectedPtr Pointer to the data expected at \p Ptr. The exchange will
/// only happen if the value at \p Ptr is equal to this
/// (unless IsWeak is set). Data at \p ExpectedPtr may or may
@@ -179,7 +179,7 @@ Error emitAtomicStoreBuiltin(
/// @return A boolean value that indicates whether the exchange has happened
/// (true) or not (false).
Expected<Value *> emitAtomicCompareExchangeBuiltin(
- Value *Ptr, Value *ExpectedPtr, Value *DesiredPtr,
+ Value *AtomicPtr, Value *ExpectedPtr, Value *DesiredPtr,
std::variant<Value *, bool> IsWeak, bool IsVolatile,
std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder,
std::variant<std::monostate, Value *, AtomicOrdering, AtomicOrderingCABI>
@@ -194,19 +194,6 @@ Expected<Value *> emitAtomicCompareExchangeBuiltin(
bool AllowInstruction = true, bool AllowSwitch = true,
bool AllowSizedLibcall = true, bool AllowLibcall = true);
-Expected<Value *> emitAtomicCompareExchangeBuiltin(
- Value *Ptr, Value *ExpectedPtr, Value *DesiredPtr,
- std::variant<Value *, bool> IsWeak, bool IsVolatile,
- std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder,
- std::variant<std::monostate, Value *, AtomicOrdering, AtomicOrderingCABI>
- FailureMemorder,
- Value *PrevPtr, Type *DataTy, std::optional<uint64_t> DataSize,
- std::optional<uint64_t> AvailableSize, MaybeAlign Align,
- IRBuilderBase &Builder, const DataLayout &DL, const TargetLibraryInfo *TLI,
- const TargetLowering *TL, llvm::Twine Name = Twine(),
- bool AllowInstruction = true, bool AllowSwitch = true,
- bool AllowSizedLibcall = true, bool AllowLibcall = true);
-
} // namespace llvm
#endif /* LLVM_TRANSFORMS_UTILS_BUILDBUILTINS_H */
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 56d38d22113118..f83426460f0fee 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -8064,7 +8064,7 @@ TargetLowering *TL = nullptr;
/*TLI=*/&TLI,
/*TL=*/TL,
/*SyncScopes=*/{},
- /*FallbackScope=*/StringRef(),
+ /*FallbackScope=*/{},
/*Name=*/Name + ".atomic.read");
if (ALResult)
return std::move(ALResult);
@@ -8115,7 +8115,7 @@ TargetLowering *TL = nullptr;
/*TLI=*/&TLI,
/*TL=*/TL,
/*SyncScopes=*/{},
- /*FallbackScope=*/StringRef(),
+ /*FallbackScope=*/{},
/*Name=*/Name + ".atomic.write");
if (ASResult)
return ASResult;
@@ -8136,7 +8136,6 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
Type *XTy = X.Var->getType();
assert(XTy->isPointerTy() &&
"OMP Atomic expects a pointer to target memory");
- Type *XElemTy = X.ElemTy;
assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
(RMWOp != AtomicRMWInst::UMax) && (RMWOp != AtomicRMWInst::UMin) &&
"OpenMP atomic does not support LT or GT operations");
@@ -8235,16 +8234,16 @@ TargetLowering *TL = nullptr;
// Reserve some stack space.
InsertPointTy InitIP = Builder.saveIP();
Builder.restoreIP(AllocaIP);
- AllocaInst *OrigPtr =
+ AllocaInst *ExpectedOrActualPtr =
Builder.CreateAlloca(XElemTy, nullptr, Name + ".atomic.expected.ptr");
- AllocaInst *UpdPtr =
+ AllocaInst *DesiredPtr =
Builder.CreateAlloca(XElemTy, nullptr, Name + ".atomic.desired.ptr");
Builder.restoreIP(InitIP);
// Old value for first transaction. Every followup-transaction will use the
// prev value from cmpxchg.
- Error ALResult = emitAtomicLoadBuiltin(X,
- /*RetPtr=*/OrigPtr,
+ Error ALResult = emitAtomicLoadBuiltin(/*AtomicPtr*/X,
+ /*RetPtr=*/ExpectedOrActualPtr,
/*IsVolatile=*/false,
/*Memorder=*/AO,
/*SyncScope=*/SyncScope::System,
@@ -8257,7 +8256,7 @@ TargetLowering *TL = nullptr;
/*TLI=*/&TLI,
/*TL=*/TL,
/*SyncScopes=*/{},
- /*FallbackScope=*/StringRef(),
+ /*FallbackScope=*/{},
/*Name=*/Name);
if (ALResult)
return std::move(ALResult);
@@ -8270,23 +8269,24 @@ TargetLowering *TL = nullptr;
Builder.SetInsertPoint(RetryBB);
// 1. Let the user code compute the new value.
- Value *OrigVal = Builder.CreateLoad(XElemTy, OrigPtr, Name + ".atomic.orig");
+ Value *OrigVal = Builder.CreateLoad(XElemTy, ExpectedOrActualPtr, Name + ".atomic.orig");
Expected<Value *> CBResult = UpdateOp(OrigVal, Builder);
if (!CBResult)
return CBResult.takeError();
Value *UpdVal = *CBResult;
- Builder.CreateStore(UpdVal, UpdPtr);
+ Builder.CreateStore(UpdVal, DesiredPtr);
// 2. AtomicCompareExchange to replace OrigVal with UpdVal.
Expected<Value *> ACEResult = emitAtomicCompareExchangeBuiltin(
/*Ptr=*/X,
- /*ExpectedPtr=*/OrigPtr,
- /*DesiredPtr=*/UpdPtr,
+ /*ExpectedPtr=*/ExpectedOrActualPtr,
+ /*DesiredPtr=*/DesiredPtr,
/*IsWeak=*/true,
/*IsVolatile=*/false,
/*SuccessMemorder=*/AO,
/*FailureMemorder=*/{},
- /*ActualPtr=*/OrigPtr,
+ /*SyncScope=*/SyncScope::System,
+ /*ActualPtr=*/ExpectedOrActualPtr,
/*DataTy=*/XElemTy,
/*DataSize=*/{},
/*AvailableSize=*/{},
@@ -8295,6 +8295,8 @@ TargetLowering *TL = nullptr;
/*DL=*/DL,
/*TLI=*/&TLI,
/*TL=*/nullptr,
+ /*SyncScopes=*/{},
+ /*FallbackScope=*/{},
/*Name=*/Name);
if (!ACEResult)
return ACEResult.takeError();
@@ -8321,7 +8323,6 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
Type *XTy = X.Var->getType();
assert(XTy->isPointerTy() &&
"OMP Atomic expects a pointer to target memory");
- Type *XElemTy = X.ElemTy;
assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
"OpenMP atomic does not support LT or GT operations");
});
@@ -8342,8 +8343,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
return Builder.saveIP();
}
-OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCompare(
- const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V,
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
+ const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V,
AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO,
omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate,
bool IsFailOnly) {
diff --git a/llvm/lib/Transforms/Utils/BuildBuiltins.cpp b/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
index d9860b623bdd14..69dcd9e4c53338 100644
--- a/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
+++ b/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
@@ -72,14 +72,11 @@ class AtomicEmitter {
public:
AtomicEmitter(
Value *Ptr,
- // Value *ExpectedPtr,
- // Value *DesiredPtr,
std::variant<Value *, bool> IsWeak, bool IsVolatile,
std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder,
std::variant<std::monostate, Value *, AtomicOrdering, AtomicOrderingCABI>
FailureMemorder,
std::variant<Value *, SyncScope::ID, StringRef> Scope,
- // Value *ActualPtr,
Type *DataTy, std::optional<uint64_t> DataSize,
std::optional<uint64_t> AvailableSize, MaybeAlign Align,
IRBuilderBase &Builder, const DataLayout &DL,
@@ -88,7 +85,7 @@ class AtomicEmitter {
StringRef FallbackScope, llvm::Twine Name, bool AllowInstruction,
bool AllowSwitch, bool AllowSizedLibcall, bool AllowLibcall)
: Ctx(Builder.getContext()), CurFn(Builder.GetInsertBlock()->getParent()),
- Ptr(Ptr), IsWeak(IsWeak), IsVolatile(IsVolatile),
+ AtomicPtr(Ptr), IsWeak(IsWeak), IsVolatile(IsVolatile),
SuccessMemorder(SuccessMemorder), FailureMemorder(FailureMemorder),
Scope(Scope), DataTy(DataTy), DataSize(DataSize),
AvailableSize(AvailableSize), Align(Align), Builder(Builder), DL(DL),
@@ -101,7 +98,7 @@ class AtomicEmitter {
LLVMContext &Ctx;
Function *CurFn;
- Value *Ptr;
+ Value *AtomicPtr;
std::variant<Value *, bool> IsWeak;
bool IsVolatile;
std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder;
@@ -136,8 +133,7 @@ class AtomicEmitter {
Value *ScopeVal;
std::optional<bool> IsWeakConst;
Value *IsWeakVal;
- // Value *ExpectedVal;
- // Value *DesiredVal;
+
BasicBlock *createBasicBlock(const Twine &BBName) {
return BasicBlock::Create(Ctx, Name + BBName, CurFn);
@@ -378,7 +374,7 @@ class AtomicEmitter {
virtual Expected<Value *> makeFallbackError() = 0;
Expected<Value *> emit(bool CoerceType = false) {
- assert(Ptr->getType()->isPointerTy());
+ assert(AtomicPtr->getType()->isPointerTy());
assert(TLI);
unsigned MaxAtomicSizeSupported = 16;
@@ -426,7 +422,7 @@ class AtomicEmitter {
//
// We prefer safety here and assume no alignment, unless
// getPointerAlignment() can determine the actual alignment.
- EffectiveAlign = Ptr->getPointerAlignment(DL);
+ EffectiveAlign = AtomicPtr->getPointerAlignment(DL);
}
// Only use the original data type if it is compatible with the atomic instruction (and sized
@@ -626,7 +622,7 @@ class AtomicLoadEmitter final : public AtomicEmitter {
AtomicOrdering SuccessMemorder,
AtomicOrdering FailureMemorder) override {
LoadInst *AtomicInst =
- Builder.CreateLoad(CoercedTy, Ptr, IsVolatile, Name + ".atomic.load");
+ Builder.CreateLoad(CoercedTy, AtomicPtr, IsVolatile, Name + ".atomic.load");
AtomicInst->setAtomic(SuccessMemorder, Scope);
AtomicInst->setAlignment(EffectiveAlign);
AtomicInst->setVolatile(IsVolatile);
@@ -638,7 +634,7 @@ class AtomicLoadEmitter final : public AtomicEmitter {
}
Expected<Value *> emitSizedLibcall() override {
- Value *LoadResult = emitAtomicLoadN(PreferredSize, Ptr, SuccessMemorderCABI,
+ Value *LoadResult = emitAtomicLoadN(PreferredSize, AtomicPtr, SuccessMemorderCABI,
Builder, DL, TLI);
LoadResult->setName(Name);
if (LoadResult) {
@@ -662,7 +658,7 @@ class AtomicLoadEmitter final : public AtomicEmitter {
Value *DataSizeVal =
ConstantInt::get(getSizeTTy(Builder, TLI), DataSizeConst);
- Value *LoadCall = emitAtomicLoad(DataSizeVal, Ptr, RetPtr,
+ Value *LoadCall = emitAtomicLoad(DataSizeVal, AtomicPtr, RetPtr,
SuccessMemorderCABI, Builder, DL, TLI);
if (LoadCall) {
LoadCall->setName(Name);
@@ -701,7 +697,7 @@ class AtomicStoreEmitter final : public AtomicEmitter {
Value *emitInst(bool IsWeak, SyncScope::ID Scope,
AtomicOrdering SuccessMemorder,
AtomicOrdering FailureMemorder) override {
- StoreInst *AtomicInst = Builder.CreateStore(Val, Ptr, IsVolatile);
+ StoreInst *AtomicInst = Builder.CreateStore(Val, AtomicPtr, IsVolatile);
AtomicInst->setAtomic(SuccessMemorder, Scope);
AtomicInst->setAlignment(EffectiveAlign);
AtomicInst->setVolatile(IsVolatile);
@@ -710,7 +706,7 @@ class AtomicStoreEmitter final : public AtomicEmitter {
Expected<Value *> emitSizedLibcall() override {
Val = Builder.CreateLoad(CoercedTy, ValPtr, Name + ".atomic.val");
- Value *StoreCall = emitAtomicStoreN(DataSizeConst, Ptr, Val,
+ Value *StoreCall = emitAtomicStoreN(DataSizeConst, AtomicPtr, Val,
SuccessMemorderCABI, Builder, DL, TLI);
StoreCall->setName(Name);
if (StoreCall)
@@ -732,7 +728,7 @@ class AtomicStoreEmitter final : public AtomicEmitter {
Value *DataSizeVal =
ConstantInt::get(getSizeTTy(Builder, TLI), DataSizeConst);
- Value *StoreCall = emitAtomicStore(DataSizeVal, Ptr, ValPtr,
+ Value *StoreCall = emitAtomicStore(DataSizeVal, AtomicPtr, ValPtr,
SuccessMemorderCABI, Builder, DL, TLI);
if (StoreCall)
return nullptr;
@@ -757,9 +753,9 @@ class AtomicCompareExchangeEmitter final : public AtomicEmitter {
assert(ExpectedPtr->getType()->isPointerTy());
assert(DesiredPtr->getType()->isPointerTy());
assert(!ActualPtr || ActualPtr->getType()->isPointerTy());
- assert(Ptr != ExpectedPtr);
- assert(Ptr != DesiredPtr);
- assert(Ptr != ActualPtr);
+ assert(AtomicPtr != ExpectedPtr);
+ assert(AtomicPtr != DesiredPtr);
+ assert(AtomicPtr != ActualPtr);
assert(ActualPtr != DesiredPtr);
this->ExpectedPtr = ExpectedPtr;
@@ -786,7 +782,7 @@ class AtomicCompareExchangeEmitter final : public AtomicEmitter {
AtomicOrdering SuccessMemorder,
AtomicOrdering FailureMemorder) override {
AtomicCmpXchgInst *AtomicInst =
- Builder.CreateAtomicCmpXchg(Ptr, ExpectedVal, DesiredVal, Align,
+ Builder.CreateAtomicCmpXchg(AtomicPtr, ExpectedVal, DesiredVal, Align,
SuccessMemorder, FailureMemorder, Scope);
AtomicInst->setName(Name + ".cmpxchg.pair");
AtomicInst->setAlignment(EffectiveAlign);
@@ -810,7 +806,7 @@ class AtomicCompareExchangeEmitter final : public AtomicEmitter {
Builder.CreateLoad(IntegerType::get(Ctx, PreferredSize * 8), DesiredPtr,
Name + ".cmpxchg.desired");
Value *SuccessResult = emitAtomicCompareExchangeN(
- PreferredSize, Ptr, ExpectedPtr, DesiredVal, SuccessMemorderCABI,
+ PreferredSize, AtomicPtr, ExpectedPtr, DesiredVal, SuccessMemorderCABI,
FailureMemorderCABI, Builder, DL, TLI);
if (SuccessResult) {
Value *SuccessBool =
@@ -834,7 +830,7 @@ class AtomicCompareExchangeEmitter final : public AtomicEmitter {
// FIXME: Some AMDGCN regression tests the addrspace, but
// __atomic_compare_exchange by definition is addrsspace(0) and
// emitAtomicCompareExchange will complain about it.
- if (Ptr->getType()->getPointerAddressSpace() ||
+ if (AtomicPtr->getType()->getPointerAddressSpace() ||
ExpectedPtr->getType()->getPointerAddressSpace() ||
DesiredPtr->getType()->getPointerAddressSpace())
return Builder.getInt1(false);
@@ -847,7 +843,7 @@ class AtomicCompareExchangeEmitter final : public AtomicEmitter {
// __atomic_compare_exchange is not supported. In either case there is no
// fallback for atomics not supported by the target and we have to crash.
Value *SuccessResult = emitAtomicCompareExchange(
- ConstantInt::get(getSizeTTy(Builder, TLI), DataSizeConst), Ptr,
+ ConstantInt::get(getSizeTTy(Builder, TLI), DataSizeConst), AtomicPtr,
ExpectedPtr, DesiredPtr, SuccessMemorderCABI, FailureMemorderCABI,
Builder, DL, TLI);
if (SuccessResult) {
@@ -874,7 +870,7 @@ class AtomicCompareExchangeEmitter final : public AtomicEmitter {
} // namespace
Error llvm::emitAtomicLoadBuiltin(
- Value *Ptr, Value *RetPtr,
+ Value *AtomicPtr, Value *RetPtr,
// std::variant<Value *, bool> IsWeak,
bool IsVolatile,
std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> Memorder,
@@ -886,15 +882,14 @@ Error llvm::emitAtomicLoadBuiltin(
StringRef FallbackScope, llvm::Twine Name, bool AllowInstruction,
bool AllowSwitch, bool AllowSizedLibcall, bool AllowLibcall) {
AtomicLoadEmitter Emitter(
- Ptr, false, IsVolatile, Memorder, {}, Scope, DataTy, DataSize,
+ AtomicPtr, false, IsVolatile, Memorder, {}, Scope, DataTy, DataSize,
AvailableSize, Align, Builder, DL, TLI, TL, SyncScopes, FallbackScope,
Name, AllowInstruction, AllowSwitch, AllowSizedLibcall, AllowLibcall);
return Emitter.emitLoad(RetPtr);
}
Error llvm::emitAtomicStoreBuiltin(
- Value *Ptr, Value *ValPtr,
- // std::variant<Value *, bool> IsWeak,
+ Value *AtomicPtr, Value *ValPtr,
bool IsVolatile,
std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> Memorder,
std::variant<Value *, SyncScope::ID, StringRef> Scope, Type *DataTy,
@@ -906,14 +901,14 @@ Error llvm::emitAtomicStoreBuiltin(
bool AllowSwitch, bool AllowSizedLibcall, bool AllowLibcall) {
AtomicStoreEmitter Emitter(
- Ptr, false, IsVolatile, Memorder, {}, Scope, DataTy, DataSize,
+ AtomicPtr, false, IsVolatile, Memorder, {}, Scope, DataTy, DataSize,
AvailableSize, Align, Builder, DL, TLI, TL, SyncScopes, FallbackScope,
Name, AllowInstruction, AllowSwitch, AllowSizedLibcall, AllowLibcall);
return Emitter.emitStore(ValPtr);
}
Expected<Value *> llvm::emitAtomicCompareExchangeBuiltin(
- Value *Ptr, Value *ExpectedPtr, Value *DesiredPtr,
+ Value *AtomicPtr, Value *ExpectedPtr, Value *DesiredPtr,
std::variant<Value *, bool> IsWeak, bool IsVolatile,
std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder,
std::variant<std::monostate, Value *, AtomicOrdering, AtomicOrderingCABI>
@@ -927,27 +922,10 @@ Expected<Value *> llvm::emitAtomicCompareExchangeBuiltin(
StringRef FallbackScope, llvm::Twine Name, bool AllowInstruction,
bool AllowSwitch, bool AllowSizedLibcall, bool AllowLibcall) {
AtomicCompareExchangeEmitter Emitter(
- Ptr, IsWeak, IsVolatile, SuccessMemorder, FailureMemorder, Scope, DataTy,
+ AtomicPtr, IsWeak, IsVolatile, SuccessMemorder, FailureMemorder, Scope, DataTy,
DataSize, AvailableSize, Align, Builder, DL, TLI, TL, SyncScopes,
FallbackScope, Name, AllowInstruction, AllowSwitch, AllowSizedLibcall,
AllowLibcall);
return Emitter.emitCmpXchg(ExpectedPtr, DesiredPtr, PrevPtr);
}
-Expected<Value *> llvm::emitAtomicCompareExchangeBuiltin(
- Value *Ptr, Value *ExpectedPtr, Value *DesiredPtr,
- std::variant<Value *, bool> IsWeak, bool IsVolatile,
- std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder,
- std::variant<std::monostate, Value *, AtomicOrdering, AtomicOrderingCABI>
- FailureMemorder,
- Value *PrevPtr, Type *DataTy, std::optional<uint64_t> DataSize,
- std::optional<uint64_t> AvailableSize, MaybeAlign Align,
- IRBuilderBase &Builder, const DataLayout &DL, const TargetLibraryInfo *TLI,
- const TargetLowering *TL, llvm::Twine Name, bool AllowInstruction,
- bool AllowSwitch, bool AllowSizedLibcall, bool AllowLibcall) {
- return emitAtomicCompareExchangeBuiltin(
- Ptr, ExpectedPtr, DesiredPtr, IsWeak, IsVolatile, SuccessMemorder,
- FailureMemorder, SyncScope::System, PrevPtr, DataTy, DataSize,
- AvailableSize, Align, Builder, DL, TLI, TL, {}, StringRef(), Name,
- AllowInstruction, AllowSwitch, AllowSizedLibcall, AllowLibcall);
-}
>From 4d199251cec3939826cb7cfa22844953359568e3 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Thu, 14 Nov 2024 21:55:44 +0100
Subject: [PATCH 14/17] remove unused variables
---
llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 5 -----
1 file changed, 5 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 403b5c7b29569c..9bf66431ef2d33 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1822,7 +1822,6 @@ Value *llvm::emitAtomicLoad(Value *Size, Value *Ptr, Value *Ret,
const DataLayout &DL,
const TargetLibraryInfo *TLI) {
Type *VoidTy = B.getVoidTy();
- Type *BoolTy = B.getInt8Ty();
Type *SizeTTy = getSizeTTy(B, TLI);
Type *PtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
@@ -1857,8 +1856,6 @@ Value *llvm::emitAtomicLoadN(size_t Size, Value *Ptr, Value *Memorder,
return nullptr;
}
- Type *VoidTy = B.getVoidTy();
- Type *BoolTy = B.getInt8Ty();
Type *PtrTy = B.getPtrTy();
Type *ValTy = B.getIntNTy(Size * 8);
Type *IntTy = getIntTy(B, TLI);
@@ -1871,7 +1868,6 @@ Value *llvm::emitAtomicStore(Value *Size, Value *Ptr, Value *ValPtr,
const DataLayout &DL,
const TargetLibraryInfo *TLI) {
Type *VoidTy = B.getVoidTy();
- Type *BoolTy = B.getInt8Ty();
Type *SizeTTy = getSizeTTy(B, TLI);
Type *PtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
@@ -1908,7 +1904,6 @@ Value *llvm::emitAtomicStoreN(size_t Size, Value *Ptr, Value *Val,
}
Type *VoidTy = B.getVoidTy();
- Type *BoolTy = B.getInt8Ty();
Type *PtrTy = B.getPtrTy();
Type *ValTy = B.getIntNTy(Size * 8);
Type *IntTy = getIntTy(B, TLI);
>From 479650adca64d2838396715e924032bcd91ff049 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Thu, 14 Nov 2024 21:57:25 +0100
Subject: [PATCH 15/17] clang-format
---
.../llvm/Transforms/Utils/BuildBuiltins.h | 10 +-
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 32 +-
llvm/lib/Transforms/Utils/BuildBuiltins.cpp | 403 +++++++++---------
.../Frontend/OpenMPIRBuilderTest.cpp | 210 ++++-----
4 files changed, 328 insertions(+), 327 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h b/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
index e8291e963e8aa2..c479ba4123ab78 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
@@ -146,9 +146,13 @@ Error emitAtomicStoreBuiltin(
/// @param ActualPtr (optional) Receives the value at \p Ptr before the atomic
/// exchange is attempted. This means:
/// In case of success:
-/// The value at \p Ptr before the update. That is, the value passed behind \p ExpectedPtr.
-/// In case of failure (including spurious failures if IsWeak):
-/// The current value at \p Ptr, i.e. the operation effectively was an atomic load of that value using FailureMemorder semantics.
+/// The value at \p Ptr before the update. That is, the
+/// value passed behind \p ExpectedPtr.
+/// In case of failure (including spurious failures if
+/// IsWeak):
+/// The current value at \p Ptr, i.e. the operation
+/// effectively was an atomic load of that value using
+/// FailureMemorder semantics.
/// @param DataSize Number of bytes to be exchanged.
/// @param AvailableSize The total size that can be used for the atomic
/// operation. It may include trailing padding in addition to
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index f83426460f0fee..e3ab01d1476b00 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -8045,9 +8045,9 @@ OpenMPIRBuilder::createAtomicRead(const LocationDescription &Loc,
Triple T(Builder.GetInsertBlock()->getModule()->getTargetTriple());
TargetLibraryInfoImpl TLII(T);
TargetLibraryInfo TLI(TLII);
-TargetLowering *TL = nullptr;
+ TargetLowering *TL = nullptr;
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
- Twine Name=X.Var->getName();
+ Twine Name = X.Var->getName();
Error ALResult =
emitAtomicLoadBuiltin(X.Var,
@@ -8081,7 +8081,7 @@ OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
if (!updateToLocation(Loc))
return Loc.IP;
- assert(!isConflictIP(Loc.IP, AllocaIP) && "IPs must not be ambiguous");
+ assert(!isConflictIP(Loc.IP, AllocaIP) && "IPs must not be ambiguous");
assert(X.Var->getType()->isPointerTy() &&
"OMP Atomic expects a pointer to target memory");
Type *XElemTy = X.ElemTy;
@@ -8090,14 +8090,14 @@ OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
Triple T(Builder.GetInsertBlock()->getModule()->getTargetTriple());
TargetLibraryInfoImpl TLII(T);
TargetLibraryInfo TLI(TLII);
-TargetLowering *TL = nullptr;
+ TargetLowering *TL = nullptr;
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
- Twine Name=X.Var->getName();
+ Twine Name = X.Var->getName();
// Reserve some stack space.
InsertPointTy ContIP = Builder.saveIP();
Builder.restoreIP(AllocaIP);
- Value * ValPtr = Builder.CreateAlloca(XElemTy, nullptr, Name + ".atomic.val");
+ Value *ValPtr = Builder.CreateAlloca(XElemTy, nullptr, Name + ".atomic.val");
Builder.restoreIP(ContIP);
Builder.CreateStore(Expr, ValPtr);
@@ -8226,11 +8226,10 @@ Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
Triple T(Builder.GetInsertBlock()->getModule()->getTargetTriple());
TargetLibraryInfoImpl TLII(T);
TargetLibraryInfo TLI(TLII);
-TargetLowering *TL = nullptr;
+ TargetLowering *TL = nullptr;
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
Twine Name(X->getName());
-
// Reserve some stack space.
InsertPointTy InitIP = Builder.saveIP();
Builder.restoreIP(AllocaIP);
@@ -8242,7 +8241,7 @@ TargetLowering *TL = nullptr;
// Old value for first transaction. Every followup-transaction will use the
// prev value from cmpxchg.
- Error ALResult = emitAtomicLoadBuiltin(/*AtomicPtr*/X,
+ Error ALResult = emitAtomicLoadBuiltin(/*AtomicPtr*/ X,
/*RetPtr=*/ExpectedOrActualPtr,
/*IsVolatile=*/false,
/*Memorder=*/AO,
@@ -8263,13 +8262,14 @@ TargetLowering *TL = nullptr;
// Create new CFG.
BasicBlock *DoneBB = splitBBWithSuffix(Builder, false, ".atomic.done");
- BasicBlock *RetryBB = splitBBWithSuffix(Builder, true, ".atomic.retry");
+ BasicBlock *RetryBB = splitBBWithSuffix(Builder, true, ".atomic.retry");
// Emit the update transaction...
Builder.SetInsertPoint(RetryBB);
// 1. Let the user code compute the new value.
- Value *OrigVal = Builder.CreateLoad(XElemTy, ExpectedOrActualPtr, Name + ".atomic.orig");
+ Value *OrigVal =
+ Builder.CreateLoad(XElemTy, ExpectedOrActualPtr, Name + ".atomic.orig");
Expected<Value *> CBResult = UpdateOp(OrigVal, Builder);
if (!CBResult)
return CBResult.takeError();
@@ -8285,8 +8285,8 @@ TargetLowering *TL = nullptr;
/*IsVolatile=*/false,
/*SuccessMemorder=*/AO,
/*FailureMemorder=*/{},
- /*SyncScope=*/SyncScope::System,
- /*ActualPtr=*/ExpectedOrActualPtr,
+ /*SyncScope=*/SyncScope::System,
+ /*ActualPtr=*/ExpectedOrActualPtr,
/*DataTy=*/XElemTy,
/*DataSize=*/{},
/*AvailableSize=*/{},
@@ -8296,14 +8296,14 @@ TargetLowering *TL = nullptr;
/*TLI=*/&TLI,
/*TL=*/nullptr,
/*SyncScopes=*/{},
- /*FallbackScope=*/{},
+ /*FallbackScope=*/{},
/*Name=*/Name);
if (!ACEResult)
return ACEResult.takeError();
Value *Success = *ACEResult;
// 3. Repeat transaction until successful.
- Builder.CreateCondBr(Success, DoneBB, RetryBB);
+ Builder.CreateCondBr(Success, DoneBB, RetryBB);
// Continue with user code when the update transaction was successful.
Builder.SetInsertPoint(DoneBB);
@@ -8344,7 +8344,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
}
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
- const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V,
+ const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V,
AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO,
omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate,
bool IsFailOnly) {
diff --git a/llvm/lib/Transforms/Utils/BuildBuiltins.cpp b/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
index 69dcd9e4c53338..3c79866a271e0d 100644
--- a/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
+++ b/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
@@ -71,15 +71,13 @@ constexpr bool holds_alternative_if_exists(const Variant &v) {
class AtomicEmitter {
public:
AtomicEmitter(
- Value *Ptr,
- std::variant<Value *, bool> IsWeak, bool IsVolatile,
+ Value *Ptr, std::variant<Value *, bool> IsWeak, bool IsVolatile,
std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> SuccessMemorder,
std::variant<std::monostate, Value *, AtomicOrdering, AtomicOrderingCABI>
FailureMemorder,
- std::variant<Value *, SyncScope::ID, StringRef> Scope,
- Type *DataTy, std::optional<uint64_t> DataSize,
- std::optional<uint64_t> AvailableSize, MaybeAlign Align,
- IRBuilderBase &Builder, const DataLayout &DL,
+ std::variant<Value *, SyncScope::ID, StringRef> Scope, Type *DataTy,
+ std::optional<uint64_t> DataSize, std::optional<uint64_t> AvailableSize,
+ MaybeAlign Align, IRBuilderBase &Builder, const DataLayout &DL,
const TargetLibraryInfo *TLI, const TargetLowering *TL,
ArrayRef<std::pair<uint32_t, StringRef>> SyncScopes,
StringRef FallbackScope, llvm::Twine Name, bool AllowInstruction,
@@ -134,7 +132,6 @@ class AtomicEmitter {
std::optional<bool> IsWeakConst;
Value *IsWeakVal;
-
BasicBlock *createBasicBlock(const Twine &BBName) {
return BasicBlock::Create(Ctx, Name + BBName, CurFn);
};
@@ -391,217 +388,219 @@ class AtomicEmitter {
assert(DataSizeConst <= AvailableSizeConst);
#ifndef NDEBUG
- if (DataTy) {
- // 'long double' (80-bit extended precision) behaves strange here.
- // DL.getTypeStoreSize says it is 10 bytes
- // Clang says it is 12 bytes
- // AtomicExpandPass would disagree with CGAtomic (not for cmpxchg that does
- // not support floats, so AtomicExpandPass doesn't even know it originally
- // was an FP80)
- TypeSize DS = DL.getTypeStoreSize(DataTy);
- assert(DS.getKnownMinValue() <= DataSizeConst &&
- "Must access at least all the relevant bits of the data, possibly "
- "some more for padding");
- }
+ if (DataTy) {
+ // 'long double' (80-bit extended precision) behaves strange here.
+ // DL.getTypeStoreSize says it is 10 bytes
+ // Clang says it is 12 bytes
+ // AtomicExpandPass would disagree with CGAtomic (not for cmpxchg that
+ // does not support floats, so AtomicExpandPass doesn't even know it
+ // originally was an FP80)
+ TypeSize DS = DL.getTypeStoreSize(DataTy);
+ assert(DS.getKnownMinValue() <= DataSizeConst &&
+ "Must access at least all the relevant bits of the data, possibly "
+ "some more for padding");
+ }
#endif
- Type *IntTy = getIntTy(Builder, TLI);
+ Type *IntTy = getIntTy(Builder, TLI);
- PreferredSize = PowerOf2Ceil(DataSizeConst);
- if (!PreferredSize || PreferredSize > MaxAtomicSizeSupported)
- PreferredSize = DataSizeConst;
+ PreferredSize = PowerOf2Ceil(DataSizeConst);
+ if (!PreferredSize || PreferredSize > MaxAtomicSizeSupported)
+ PreferredSize = DataSizeConst;
- if (Align) {
- EffectiveAlign = *Align;
- } else {
- // https://llvm.org/docs/LangRef.html#cmpxchg-instruction
- //
- // The alignment is only optional when parsing textual IR; for in-memory
- // IR, it is always present. If unspecified, the alignment is assumed to
- // be equal to the size of the ‘<value>’ type.
- //
- // We prefer safety here and assume no alignment, unless
- // getPointerAlignment() can determine the actual alignment.
- EffectiveAlign = AtomicPtr->getPointerAlignment(DL);
- }
+ if (Align) {
+ EffectiveAlign = *Align;
+ } else {
+ // https://llvm.org/docs/LangRef.html#cmpxchg-instruction
+ //
+ // The alignment is only optional when parsing textual IR; for in-memory
+ // IR, it is always present. If unspecified, the alignment is assumed to
+ // be equal to the size of the ‘<value>’ type.
+ //
+ // We prefer safety here and assume no alignment, unless
+ // getPointerAlignment() can determine the actual alignment.
+ EffectiveAlign = AtomicPtr->getPointerAlignment(DL);
+ }
- // Only use the original data type if it is compatible with the atomic instruction (and sized
- // libcall function) and matches the preferred size. No type punning needed
- // when the libcall function only takes pointers.
- CoercedTy = DataTy;
- // If we have rounded-up the data size, unconditionally coerce to a different type.
- if (DataSizeConst != PreferredSize)
- CoercedTy = IntegerType::get(Ctx, PreferredSize * 8);
- if (CoerceType) {
- if (DataTy && DataSizeConst == PreferredSize &&
- (DataTy->isIntegerTy() || DataTy->isPointerTy()))
- CoercedTy = DataTy;
- else if (PreferredSize <= 16)
- CoercedTy = IntegerType::get(Ctx, PreferredSize * 8);
-}
+ // Only use the original data type if it is compatible with the atomic
+ // instruction (and sized libcall function) and matches the preferred size.
+ // No type punning needed when the libcall function only takes pointers.
+ CoercedTy = DataTy;
+ // If we have rounded-up the data size, unconditionally coerce to a
+ // different type.
+ if (DataSizeConst != PreferredSize)
+ CoercedTy = IntegerType::get(Ctx, PreferredSize * 8);
+ if (CoerceType) {
+ if (DataTy && DataSizeConst == PreferredSize &&
+ (DataTy->isIntegerTy() || DataTy->isPointerTy()))
+ CoercedTy = DataTy;
+ else if (PreferredSize <= 16)
+ CoercedTy = IntegerType::get(Ctx, PreferredSize * 8);
+ }
- // For resolving the SuccessMemorder/FailureMemorder arguments. If it is
- // constant, determine the AtomicOrdering for use with the cmpxchg
- // instruction. Also determines the llvm::Value to be passed to
- // __atomic_compare_exchange in case cmpxchg is not legal.
- auto processMemorder = [&](auto MemorderVariant)
- -> std::pair<std::optional<AtomicOrdering>, Value *> {
- if (holds_alternative_if_exists<std::monostate>(MemorderVariant)) {
- // Derive FailureMemorder from SucccessMemorder
- if (SuccessMemorderConst) {
- AtomicOrdering MOFailure =
- AtomicCmpXchgInst::getStrongestFailureOrdering(
- *SuccessMemorderConst);
- MemorderVariant = MOFailure;
+ // For resolving the SuccessMemorder/FailureMemorder arguments. If it is
+ // constant, determine the AtomicOrdering for use with the cmpxchg
+ // instruction. Also determines the llvm::Value to be passed to
+ // __atomic_compare_exchange in case cmpxchg is not legal.
+ auto processMemorder = [&](auto MemorderVariant)
+ -> std::pair<std::optional<AtomicOrdering>, Value *> {
+ if (holds_alternative_if_exists<std::monostate>(MemorderVariant)) {
+ // Derive FailureMemorder from SucccessMemorder
+ if (SuccessMemorderConst) {
+ AtomicOrdering MOFailure =
+ AtomicCmpXchgInst::getStrongestFailureOrdering(
+ *SuccessMemorderConst);
+ MemorderVariant = MOFailure;
+ }
}
- }
- if (std::holds_alternative<AtomicOrdering>(MemorderVariant)) {
- auto Memorder = std::get<AtomicOrdering>(MemorderVariant);
- return std::make_pair(
- Memorder,
- ConstantInt::get(IntTy, static_cast<uint64_t>(toCABI(Memorder))));
- }
+ if (std::holds_alternative<AtomicOrdering>(MemorderVariant)) {
+ auto Memorder = std::get<AtomicOrdering>(MemorderVariant);
+ return std::make_pair(
+ Memorder,
+ ConstantInt::get(IntTy, static_cast<uint64_t>(toCABI(Memorder))));
+ }
- if (std::holds_alternative<AtomicOrderingCABI>(MemorderVariant)) {
- auto MemorderCABI = std::get<AtomicOrderingCABI>(MemorderVariant);
- return std::make_pair(
- fromCABI(MemorderCABI),
- ConstantInt::get(IntTy, static_cast<uint64_t>(MemorderCABI)));
- }
+ if (std::holds_alternative<AtomicOrderingCABI>(MemorderVariant)) {
+ auto MemorderCABI = std::get<AtomicOrderingCABI>(MemorderVariant);
+ return std::make_pair(
+ fromCABI(MemorderCABI),
+ ConstantInt::get(IntTy, static_cast<uint64_t>(MemorderCABI)));
+ }
- auto *MemorderCABI = std::get<Value *>(MemorderVariant);
- if (auto *MO = dyn_cast<ConstantInt>(MemorderCABI)) {
- uint64_t MOInt = MO->getZExtValue();
- return std::make_pair(fromCABI(MOInt), MO);
- }
+ auto *MemorderCABI = std::get<Value *>(MemorderVariant);
+ if (auto *MO = dyn_cast<ConstantInt>(MemorderCABI)) {
+ uint64_t MOInt = MO->getZExtValue();
+ return std::make_pair(fromCABI(MOInt), MO);
+ }
- return std::make_pair(std::nullopt, MemorderCABI);
- };
+ return std::make_pair(std::nullopt, MemorderCABI);
+ };
- auto processIsWeak =
- [&](auto WeakVariant) -> std::pair<std::optional<bool>, Value *> {
- if (std::holds_alternative<bool>(WeakVariant)) {
- bool IsWeakBool = std::get<bool>(WeakVariant);
- return std::make_pair(IsWeakBool, Builder.getInt1(IsWeakBool));
- }
+ auto processIsWeak =
+ [&](auto WeakVariant) -> std::pair<std::optional<bool>, Value *> {
+ if (std::holds_alternative<bool>(WeakVariant)) {
+ bool IsWeakBool = std::get<bool>(WeakVariant);
+ return std::make_pair(IsWeakBool, Builder.getInt1(IsWeakBool));
+ }
- auto *BoolVal = std::get<Value *>(WeakVariant);
- if (auto *BoolConst = dyn_cast<ConstantInt>(BoolVal)) {
- uint64_t IsWeakBool = BoolConst->getZExtValue();
- return std::make_pair(IsWeakBool != 0, BoolVal);
- }
+ auto *BoolVal = std::get<Value *>(WeakVariant);
+ if (auto *BoolConst = dyn_cast<ConstantInt>(BoolVal)) {
+ uint64_t IsWeakBool = BoolConst->getZExtValue();
+ return std::make_pair(IsWeakBool != 0, BoolVal);
+ }
- return std::make_pair(std::nullopt, BoolVal);
- };
+ return std::make_pair(std::nullopt, BoolVal);
+ };
- auto processScope = [&](auto ScopeVariant)
- -> std::pair<std::optional<SyncScope::ID>, Value *> {
- if (std::holds_alternative<SyncScope::ID>(ScopeVariant)) {
- auto ScopeID = std::get<SyncScope::ID>(ScopeVariant);
- return std::make_pair(ScopeID, nullptr);
- }
+ auto processScope = [&](auto ScopeVariant)
+ -> std::pair<std::optional<SyncScope::ID>, Value *> {
+ if (std::holds_alternative<SyncScope::ID>(ScopeVariant)) {
+ auto ScopeID = std::get<SyncScope::ID>(ScopeVariant);
+ return std::make_pair(ScopeID, nullptr);
+ }
- if (std::holds_alternative<StringRef>(ScopeVariant)) {
- auto ScopeName = std::get<StringRef>(ScopeVariant);
- SyncScope::ID ScopeID = Ctx.getOrInsertSyncScopeID(ScopeName);
- return std::make_pair(ScopeID, nullptr);
- }
+ if (std::holds_alternative<StringRef>(ScopeVariant)) {
+ auto ScopeName = std::get<StringRef>(ScopeVariant);
+ SyncScope::ID ScopeID = Ctx.getOrInsertSyncScopeID(ScopeName);
+ return std::make_pair(ScopeID, nullptr);
+ }
- auto *IntVal = std::get<Value *>(ScopeVariant);
- if (auto *InstConst = dyn_cast<ConstantInt>(IntVal)) {
- uint64_t ScopeVal = InstConst->getZExtValue();
- return std::make_pair(ScopeVal, IntVal);
- }
+ auto *IntVal = std::get<Value *>(ScopeVariant);
+ if (auto *InstConst = dyn_cast<ConstantInt>(IntVal)) {
+ uint64_t ScopeVal = InstConst->getZExtValue();
+ return std::make_pair(ScopeVal, IntVal);
+ }
- return std::make_pair(std::nullopt, IntVal);
- };
+ return std::make_pair(std::nullopt, IntVal);
+ };
- std::tie(IsWeakConst, IsWeakVal) = processIsWeak(IsWeak);
- std::tie(SuccessMemorderConst, SuccessMemorderCABI) =
- processMemorder(SuccessMemorder);
- std::tie(FailureMemorderConst, FailureMemorderCABI) =
- processMemorder(FailureMemorder);
- std::tie(ScopeConst, ScopeVal) = processScope(Scope);
-
- // Fix malformed inputs. We do not want to emit illegal IR.
- //
- // https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
- //
- // [failure_memorder] This memory order cannot be __ATOMIC_RELEASE nor
- // __ATOMIC_ACQ_REL. It also cannot be a stronger order than that
- // specified by success_memorder.
- //
- // https://llvm.org/docs/LangRef.html#cmpxchg-instruction
- //
- // Both ordering parameters must be at least monotonic, the failure
- // ordering cannot be either release or acq_rel.
- //
- if (FailureMemorderConst &&
- ((*FailureMemorderConst == AtomicOrdering::Release) ||
- (*FailureMemorderConst == AtomicOrdering::AcquireRelease))) {
- // Fall back to monotonic atomic when illegal value is passed. As with the
- // dynamic case below, it is an arbitrary choice.
- FailureMemorderConst = AtomicOrdering::Monotonic;
- }
- if (FailureMemorderConst && SuccessMemorderConst &&
- !isAtLeastOrStrongerThan(*SuccessMemorderConst, *FailureMemorderConst)) {
- // Make SuccessMemorder as least as strong as FailureMemorder
- SuccessMemorderConst =
- getMergedAtomicOrdering(*SuccessMemorderConst, *FailureMemorderConst);
- }
+ std::tie(IsWeakConst, IsWeakVal) = processIsWeak(IsWeak);
+ std::tie(SuccessMemorderConst, SuccessMemorderCABI) =
+ processMemorder(SuccessMemorder);
+ std::tie(FailureMemorderConst, FailureMemorderCABI) =
+ processMemorder(FailureMemorder);
+ std::tie(ScopeConst, ScopeVal) = processScope(Scope);
- // https://llvm.org/docs/LangRef.html#cmpxchg-instruction
- //
- // The type of ‘<cmp>’ must be an integer or pointer type whose bit width is
- // a power of two greater than or equal to eight and less than or equal to a
- // target-specific size limit.
- bool CanUseInst = PreferredSize <= MaxAtomicSizeSupported &&
- llvm::isPowerOf2_64(PreferredSize) && CoercedTy;
- bool CanUseSingleInst = CanUseInst && SuccessMemorderConst &&
- FailureMemorderConst && IsWeakConst && ScopeConst;
- bool CanUseSizedLibcall =
- canUseSizedAtomicCall(PreferredSize, EffectiveAlign, DL) &&
- ScopeConst == SyncScope::System;
- bool CanUseLibcall = ScopeConst == SyncScope::System;
-
- if (CanUseSingleInst && AllowInstruction) {
- prepareInst();
-
- return emitInst(*IsWeakConst, *ScopeConst, *SuccessMemorderConst,
- *FailureMemorderConst);
- }
+ // Fix malformed inputs. We do not want to emit illegal IR.
+ //
+ // https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
+ //
+ // [failure_memorder] This memory order cannot be __ATOMIC_RELEASE nor
+ // __ATOMIC_ACQ_REL. It also cannot be a stronger order than that
+ // specified by success_memorder.
+ //
+ // https://llvm.org/docs/LangRef.html#cmpxchg-instruction
+ //
+ // Both ordering parameters must be at least monotonic, the failure
+ // ordering cannot be either release or acq_rel.
+ //
+ if (FailureMemorderConst &&
+ ((*FailureMemorderConst == AtomicOrdering::Release) ||
+ (*FailureMemorderConst == AtomicOrdering::AcquireRelease))) {
+ // Fall back to monotonic atomic when illegal value is passed. As with the
+ // dynamic case below, it is an arbitrary choice.
+ FailureMemorderConst = AtomicOrdering::Monotonic;
+ }
+ if (FailureMemorderConst && SuccessMemorderConst &&
+ !isAtLeastOrStrongerThan(*SuccessMemorderConst,
+ *FailureMemorderConst)) {
+ // Make SuccessMemorder as least as strong as FailureMemorder
+ SuccessMemorderConst =
+ getMergedAtomicOrdering(*SuccessMemorderConst, *FailureMemorderConst);
+ }
- // Switching only needed for cmpxchg instruction which requires constant
- // arguments.
- // FIXME: If AtomicExpandPass later considers the cmpxchg not lowerable for
- // the given target, it will also generate a call to the
- // __atomic_compare_exchange function. In that case the switching was very
- // unnecessary but cannot be undone.
- if (CanUseInst && AllowSwitch && AllowInstruction) {
- prepareInst();
- return emitWeakSwitch();
- }
+ // https://llvm.org/docs/LangRef.html#cmpxchg-instruction
+ //
+ // The type of ‘<cmp>’ must be an integer or pointer type whose bit width
+ // is a power of two greater than or equal to eight and less than or equal
+ // to a target-specific size limit.
+ bool CanUseInst = PreferredSize <= MaxAtomicSizeSupported &&
+ llvm::isPowerOf2_64(PreferredSize) && CoercedTy;
+ bool CanUseSingleInst = CanUseInst && SuccessMemorderConst &&
+ FailureMemorderConst && IsWeakConst && ScopeConst;
+ bool CanUseSizedLibcall =
+ canUseSizedAtomicCall(PreferredSize, EffectiveAlign, DL) &&
+ ScopeConst == SyncScope::System;
+ bool CanUseLibcall = ScopeConst == SyncScope::System;
+
+ if (CanUseSingleInst && AllowInstruction) {
+ prepareInst();
+
+ return emitInst(*IsWeakConst, *ScopeConst, *SuccessMemorderConst,
+ *FailureMemorderConst);
+ }
- // Fallback to a libcall function. From here on IsWeak/Scope/IsVolatile is
- // ignored. IsWeak is assumed to be false, Scope is assumed to be
- // SyncScope::System (strongest possible assumption synchronizing with
- // everything, instead of just a subset of sibling threads), and volatile
- // does not apply to function calls.
+ // Switching only needed for cmpxchg instruction which requires constant
+ // arguments.
+ // FIXME: If AtomicExpandPass later considers the cmpxchg not lowerable for
+ // the given target, it will also generate a call to the
+ // __atomic_compare_exchange function. In that case the switching was very
+ // unnecessary but cannot be undone.
+ if (CanUseInst && AllowSwitch && AllowInstruction) {
+ prepareInst();
+ return emitWeakSwitch();
+ }
- if (CanUseSizedLibcall && AllowSizedLibcall) {
- Expected<Value *> SizedLibcallResult = emitSizedLibcall();
- if (SizedLibcallResult)
- return SizedLibcallResult;
- }
+ // Fallback to a libcall function. From here on IsWeak/Scope/IsVolatile is
+ // ignored. IsWeak is assumed to be false, Scope is assumed to be
+ // SyncScope::System (strongest possible assumption synchronizing with
+ // everything, instead of just a subset of sibling threads), and volatile
+ // does not apply to function calls.
- if (CanUseLibcall && AllowLibcall) {
- Expected<Value *> LibcallResult = emitSizedLibcall();
- if (LibcallResult)
- return LibcallResult;
- }
+ if (CanUseSizedLibcall && AllowSizedLibcall) {
+ Expected<Value *> SizedLibcallResult = emitSizedLibcall();
+ if (SizedLibcallResult)
+ return SizedLibcallResult;
+ }
+
+ if (CanUseLibcall && AllowLibcall) {
+ Expected<Value *> LibcallResult = emitSizedLibcall();
+ if (LibcallResult)
+ return LibcallResult;
+ }
- return makeFallbackError();
+ return makeFallbackError();
}
};
@@ -621,8 +620,8 @@ class AtomicLoadEmitter final : public AtomicEmitter {
Value *emitInst(bool IsWeak, SyncScope::ID Scope,
AtomicOrdering SuccessMemorder,
AtomicOrdering FailureMemorder) override {
- LoadInst *AtomicInst =
- Builder.CreateLoad(CoercedTy, AtomicPtr, IsVolatile, Name + ".atomic.load");
+ LoadInst *AtomicInst = Builder.CreateLoad(CoercedTy, AtomicPtr, IsVolatile,
+ Name + ".atomic.load");
AtomicInst->setAtomic(SuccessMemorder, Scope);
AtomicInst->setAlignment(EffectiveAlign);
AtomicInst->setVolatile(IsVolatile);
@@ -634,8 +633,8 @@ class AtomicLoadEmitter final : public AtomicEmitter {
}
Expected<Value *> emitSizedLibcall() override {
- Value *LoadResult = emitAtomicLoadN(PreferredSize, AtomicPtr, SuccessMemorderCABI,
- Builder, DL, TLI);
+ Value *LoadResult = emitAtomicLoadN(PreferredSize, AtomicPtr,
+ SuccessMemorderCABI, Builder, DL, TLI);
LoadResult->setName(Name);
if (LoadResult) {
Builder.CreateStore(LoadResult, RetPtr);
@@ -761,7 +760,7 @@ class AtomicCompareExchangeEmitter final : public AtomicEmitter {
this->ExpectedPtr = ExpectedPtr;
this->DesiredPtr = DesiredPtr;
this->ActualPtr = ActualPtr;
- return emit(/*CoerceType*/true);
+ return emit(/*CoerceType*/ true);
}
protected:
@@ -889,8 +888,7 @@ Error llvm::emitAtomicLoadBuiltin(
}
Error llvm::emitAtomicStoreBuiltin(
- Value *AtomicPtr, Value *ValPtr,
- bool IsVolatile,
+ Value *AtomicPtr, Value *ValPtr, bool IsVolatile,
std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> Memorder,
std::variant<Value *, SyncScope::ID, StringRef> Scope, Type *DataTy,
std::optional<uint64_t> DataSize, std::optional<uint64_t> AvailableSize,
@@ -922,10 +920,9 @@ Expected<Value *> llvm::emitAtomicCompareExchangeBuiltin(
StringRef FallbackScope, llvm::Twine Name, bool AllowInstruction,
bool AllowSwitch, bool AllowSizedLibcall, bool AllowLibcall) {
AtomicCompareExchangeEmitter Emitter(
- AtomicPtr, IsWeak, IsVolatile, SuccessMemorder, FailureMemorder, Scope, DataTy,
- DataSize, AvailableSize, Align, Builder, DL, TLI, TL, SyncScopes,
+ AtomicPtr, IsWeak, IsVolatile, SuccessMemorder, FailureMemorder, Scope,
+ DataTy, DataSize, AvailableSize, Align, Builder, DL, TLI, TL, SyncScopes,
FallbackScope, Name, AllowInstruction, AllowSwitch, AllowSizedLibcall,
AllowLibcall);
return Emitter.emitCmpXchg(ExpectedPtr, DesiredPtr, PrevPtr);
}
-
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index f3a779bdcc3cb6..58002b841824d4 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -43,8 +43,6 @@ using namespace omp;
namespace {
-
-
/// Create an instruction that uses the values in \p Values. We use "printf"
/// just because it is often used for this purpose in test code, but it is never
/// executed here.
@@ -176,101 +174,97 @@ static omp::ScheduleKind getSchedKind(omp::OMPScheduleType SchedType) {
}
}
-
-static Value * followStoreLoad(Instruction *I, Value *V) {
+static Value *followStoreLoad(Instruction *I, Value *V) {
while (true) {
- Value *Addr;
+ Value *Addr;
while (true) {
- I= I->getPrevNode();
+ I = I->getPrevNode();
if (!I)
return V;
if (!isa<LoadInst>(I))
- continue ;
+ continue;
auto LoadI = cast<LoadInst>(I);
if (LoadI != V)
- continue ;
+ continue;
Addr = LoadI->getPointerOperand();
- V =nullptr;
+ V = nullptr;
break;
}
while (true) {
- I = I->getPrevNode();
+ I = I->getPrevNode();
if (!I)
return V;
- if (!isa<StoreInst>(I))
- continue ;
+ if (!isa<StoreInst>(I))
+ continue;
auto StoreI = cast<StoreInst>(I);
- if (StoreI->getPointerOperand() != Addr )
- continue ;
+ if (StoreI->getPointerOperand() != Addr)
+ continue;
V = StoreI->getValueOperand();
break;
}
}
}
+static SetVector<Value *> storedValues(Value *Val) {
+ SetVector<Value *> Vals;
+ if (!isa<LoadInst>(Val))
+ return Vals;
+ auto LD = cast<LoadInst>(Val);
-static SetVector<Value *> storedValues(Value *Val) {
- SetVector<Value *> Vals;
- if (!isa<LoadInst>(Val))
- return Vals;
- auto LD = cast<LoadInst>(Val);
+ DenseSet<Instruction *> Visited;
+ SmallVector<Value *> Addrs;
- DenseSet<Instruction *> Visited;
- SmallVector<Value*> Addrs;
-
- Addrs.push_back(LD->getPointerOperand());
+ Addrs.push_back(LD->getPointerOperand());
while (!Addrs.empty()) {
- auto Addr = Addrs.pop_back_val();
- auto AddrI = dyn_cast<Instruction>(Addr);
- if (!AddrI) continue ;
- if (Visited.contains(AddrI ))
- continue;
- Visited.insert(AddrI);
-
-
- for (auto &&U : AddrI->uses()) {
- if (auto S = dyn_cast<StoreInst>(U.getUser())) {
- assert(S->getPointerOperand() == AddrI);
- auto V = S->getValueOperand();
- if (auto ML = dyn_cast<LoadInst>(V))
- Addrs.push_back (ML->getPointerOperand() );
- else
- Vals.insert(V);
- } else
- if (auto L = dyn_cast<LoadInst>(U.getUser())) {
- Addrs.push_back(L->getPointerOperand());
- }
- }
+ auto Addr = Addrs.pop_back_val();
+ auto AddrI = dyn_cast<Instruction>(Addr);
+ if (!AddrI)
+ continue;
+ if (Visited.contains(AddrI))
+ continue;
+ Visited.insert(AddrI);
+
+ for (auto &&U : AddrI->uses()) {
+ if (auto S = dyn_cast<StoreInst>(U.getUser())) {
+ assert(S->getPointerOperand() == AddrI);
+ auto V = S->getValueOperand();
+ if (auto ML = dyn_cast<LoadInst>(V))
+ Addrs.push_back(ML->getPointerOperand());
+ else
+ Vals.insert(V);
+ } else if (auto L = dyn_cast<LoadInst>(U.getUser())) {
+ Addrs.push_back(L->getPointerOperand());
+ }
+ }
}
return Vals;
}
-
-
-static Value * followStorePtr(Value *Val) {
+static Value *followStorePtr(Value *Val) {
Value *V = Val;
- if (!isa<LoadInst>(Val))
- return V;
- auto LD = cast<LoadInst>(Val);
- auto Alloca = dyn_cast<AllocaInst>( LD->getPointerOperand());
- if (!Alloca)
- return V;
-
- auto STUse = [](Instruction *Addr) -> StoreInst *{
- for (auto &&U : Addr->uses())
- if (auto ST = dyn_cast<StoreInst>(U.getUser()))
- if (ST->getPointerOperand() == Addr && !isa<LoadInst>( ST->getValueOperand()))
- return ST;
- return nullptr;
- }(Alloca);
- return STUse;
+ if (!isa<LoadInst>(Val))
+ return V;
+ auto LD = cast<LoadInst>(Val);
+ auto Alloca = dyn_cast<AllocaInst>(LD->getPointerOperand());
+ if (!Alloca)
+ return V;
+
+ auto STUse = [](Instruction *Addr) -> StoreInst * {
+ for (auto &&U : Addr->uses())
+ if (auto ST = dyn_cast<StoreInst>(U.getUser()))
+ if (ST->getPointerOperand() == Addr &&
+ !isa<LoadInst>(ST->getValueOperand()))
+ return ST;
+ return nullptr;
+ }(Alloca);
+ return STUse;
}
-static StoreInst *findAtomicInst(BasicBlock *EntryBB, Value *XVal) {
- StoreInst *StoreofAtomic=nullptr;
+static StoreInst *findAtomicInst(BasicBlock *EntryBB, Value *XVal) {
+ StoreInst *StoreofAtomic = nullptr;
for (Instruction &Cur : *EntryBB) {
if (isa<StoreInst>(Cur)) {
StoreofAtomic = cast<StoreInst>(&Cur);
@@ -279,19 +273,17 @@ static StoreInst *findAtomicInst(BasicBlock *EntryBB, Value *XVal) {
StoreofAtomic = nullptr;
}
}
- return StoreofAtomic;
+ return StoreofAtomic;
}
-template <typename T> static T* findLastInstInBB(BasicBlock *BB) {
- for (Instruction &Cur : reverse(*BB)) {
- if (T *Candidate = dyn_cast<T>(&Cur))
+template <typename T> static T *findLastInstInBB(BasicBlock *BB) {
+ for (Instruction &Cur : reverse(*BB)) {
+ if (T *Candidate = dyn_cast<T>(&Cur))
return Candidate;
}
- return nullptr;
+ return nullptr;
}
-
-
class OpenMPIRBuilderTest : public testing::Test {
protected:
void SetUp() override {
@@ -3981,12 +3973,15 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteFlt) {
EXPECT_TRUE((bool)AfterWriteIP);
Builder.restoreIP(*AfterWriteIP);
-// IntegerType *IntCastTy = IntegerType::get(M->getContext(), Float32->getScalarSizeInBits());
+ // IntegerType *IntCastTy = IntegerType::get(M->getContext(),
+ // Float32->getScalarSizeInBits());
- // Value *ExprCast = Builder.CreateBitCast(ValToWrite, IntCastTy);
+ // Value *ExprCast = Builder.CreateBitCast(ValToWrite, IntCastTy);
-StoreInst *StoreofAtomic = findAtomicInst(OMPBuilder.getInsertionPoint().getBlock(), XVal);
- EXPECT_EQ(followStoreLoad(StoreofAtomic, StoreofAtomic->getValueOperand()), ValToWrite);
+ StoreInst *StoreofAtomic =
+ findAtomicInst(OMPBuilder.getInsertionPoint().getBlock(), XVal);
+ EXPECT_EQ(followStoreLoad(StoreofAtomic, StoreofAtomic->getValueOperand()),
+ ValToWrite);
EXPECT_EQ(StoreofAtomic->getPointerOperand(), XVal);
EXPECT_TRUE(StoreofAtomic->isAtomic());
@@ -4022,11 +4017,10 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteInt) {
StoreInst *StoreofAtomic = findAtomicInst(EntryBB, XVal);
-
-
EXPECT_NE(StoreofAtomic, nullptr);
EXPECT_TRUE(StoreofAtomic->isAtomic());
- EXPECT_EQ(followStoreLoad(StoreofAtomic, StoreofAtomic->getValueOperand()), ValToWrite);
+ EXPECT_EQ(followStoreLoad(StoreofAtomic, StoreofAtomic->getValueOperand()),
+ ValToWrite);
Builder.CreateRetVoid();
OMPBuilder.finalize();
@@ -4062,10 +4056,11 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) {
Sub = IRB.CreateSub(ConstVal, Atomic);
return Sub;
};
- OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createAtomicUpdate(Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart);
+ OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createAtomicUpdate(
+ Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart);
assert(AfterIP && "unexpected error");
Builder.restoreIP(*AfterIP);
- BasicBlock *DoneBB = Builder.GetInsertBlock();
+ BasicBlock *DoneBB = Builder.GetInsertBlock();
BasicBlock *ContBB = DoneBB->getSinglePredecessor();
BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
EXPECT_NE(ContTI, nullptr);
@@ -4074,11 +4069,11 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) {
EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
EXPECT_NE(EndBB, nullptr);
- //PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
- //EXPECT_NE(Phi, nullptr);
- //EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
- //EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
- //EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
+ // PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
+ // EXPECT_NE(Phi, nullptr);
+ // EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
+ // EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
+ // EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
EXPECT_EQ(Sub->getNumUses(), 1U);
StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
@@ -4086,11 +4081,13 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) {
ExtractValueInst *ExVI1 = findLastInstInBB<ExtractValueInst>(ContBB);
EXPECT_NE(ExVI1, nullptr);
- AtomicCmpXchgInst *CmpExchg = dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
+ AtomicCmpXchgInst *CmpExchg =
+ dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
EXPECT_NE(CmpExchg, nullptr);
EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
- EXPECT_TRUE(storedValues(CmpExchg->getCompareOperand()).contains( ExpectedVal));
- EXPECT_TRUE(storedValues(CmpExchg->getNewValOperand()).contains( Sub));
+ EXPECT_TRUE(
+ storedValues(CmpExchg->getCompareOperand()).contains(ExpectedVal));
+ EXPECT_TRUE(storedValues(CmpExchg->getNewValOperand()).contains(Sub));
EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
@@ -4143,24 +4140,25 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateFloat) {
EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
EXPECT_NE(EndBB, nullptr);
- //PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
- //EXPECT_NE(Phi, nullptr);
- //EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
- //EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
- //EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
+ // PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
+ // EXPECT_NE(Phi, nullptr);
+ // EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
+ // EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
+ // EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
EXPECT_EQ(Sub->getNumUses(), 1U);
StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
- ExtractValueInst *ExVI1 = findLastInstInBB<ExtractValueInst>(ContBB);
+ ExtractValueInst *ExVI1 = findLastInstInBB<ExtractValueInst>(ContBB);
EXPECT_NE(ExVI1, nullptr);
AtomicCmpXchgInst *CmpExchg =
dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
EXPECT_NE(CmpExchg, nullptr);
EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
- EXPECT_TRUE(storedValues(CmpExchg->getCompareOperand()).contains( ExpectedVal));
- EXPECT_TRUE(storedValues(CmpExchg->getNewValOperand()).contains( Sub));
+ EXPECT_TRUE(
+ storedValues(CmpExchg->getCompareOperand()).contains(ExpectedVal));
+ EXPECT_TRUE(storedValues(CmpExchg->getNewValOperand()).contains(Sub));
EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
@@ -4212,23 +4210,25 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateIntr) {
EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
EXPECT_NE(EndBB, nullptr);
- //PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
- //EXPECT_NE(Phi, nullptr);
- //EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
- //EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
- //EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
+ // PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
+ // EXPECT_NE(Phi, nullptr);
+ // EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
+ // EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
+ // EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
EXPECT_EQ(Sub->getNumUses(), 1U);
StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
- ExtractValueInst *ExVI1 = findLastInstInBB<ExtractValueInst>(ContBB);
+ ExtractValueInst *ExVI1 = findLastInstInBB<ExtractValueInst>(ContBB);
EXPECT_NE(ExVI1, nullptr);
- AtomicCmpXchgInst *CmpExchg = dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
+ AtomicCmpXchgInst *CmpExchg =
+ dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
EXPECT_NE(CmpExchg, nullptr);
EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
- EXPECT_TRUE(storedValues(CmpExchg->getCompareOperand()).contains( ExpectedVal));
- EXPECT_TRUE(storedValues(CmpExchg->getNewValOperand()).contains( Sub));
+ EXPECT_TRUE(
+ storedValues(CmpExchg->getCompareOperand()).contains(ExpectedVal));
+ EXPECT_TRUE(storedValues(CmpExchg->getNewValOperand()).contains(Sub));
EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
>From 12eaea5d6f42c1a8ab78dcab1a78b559a06f6b01 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Thu, 14 Nov 2024 22:18:39 +0100
Subject: [PATCH 16/17] test fix
---
llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
index 9d1759af697dac..a9e5b8bbf67bfe 100644
--- a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
+++ b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
@@ -54,7 +54,7 @@
## the exact count first; the two directives should add up to that.
## Yes, this means additions to TLI will fail this test, but the argument
## to -COUNT can't be an expression.
-# AVAIL: TLI knows 538 symbols, 289 available
+# AVAIL: TLI knows 539 symbols, 289 available
# AVAIL-COUNT-289: {{^}} available
# AVAIL-NOT: {{^}} available
# UNAVAIL-COUNT-250: not available
>From fd3b5131b1bd08da36dba14ec6de527bd0b27c49 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Thu, 14 Nov 2024 22:47:30 +0100
Subject: [PATCH 17/17] Some cleanup
---
llvm/lib/Analysis/TargetLibraryInfo.cpp | 4 +-
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 4 +-
llvm/lib/Transforms/Utils/BuildBuiltins.cpp | 9 +-
.../Frontend/OpenMPIRBuilderTest.cpp | 116 ++++++------------
4 files changed, 42 insertions(+), 91 deletions(-)
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 11790fdafee8b6..18dc49d9252f4b 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -1059,13 +1059,13 @@ static bool matchType(FuncArgTypeID ArgTy, const Type *Ty, unsigned IntBits,
return Ty->isIntegerTy() && Ty->getPrimitiveSizeInBits() >= IntBits;
case Int64:
return Ty->isIntegerTy(64);
+ case Int128:
+ return Ty->isIntegerTy(128);
case LLong:
return Ty->isIntegerTy(64);
case SizeT:
case SSizeT:
return Ty->isIntegerTy(SizeTBits);
- case Int128:
- return Ty->isIntegerTy(128);
case Flt:
return Ty->isFloatTy();
case Dbl:
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index e3ab01d1476b00..a559ff81aa9646 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -8189,7 +8189,7 @@ Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr) {
- assert(XElemTy);
+ assert(XElemTy && "Argument must not be NULL");
bool emitRMWOp = false;
switch (RMWOp) {
@@ -8240,7 +8240,7 @@ Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
Builder.restoreIP(InitIP);
// Old value for first transaction. Every followup-transaction will use the
- // prev value from cmpxchg.
+ // actual value from cmpxchg.
Error ALResult = emitAtomicLoadBuiltin(/*AtomicPtr*/ X,
/*RetPtr=*/ExpectedOrActualPtr,
/*IsVolatile=*/false,
diff --git a/llvm/lib/Transforms/Utils/BuildBuiltins.cpp b/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
index 3c79866a271e0d..473d0ac943f25f 100644
--- a/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
+++ b/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
@@ -5,10 +5,6 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This file implements some functions for lowering compiler builtins.
-//
-//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/BuildBuiltins.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -68,6 +64,7 @@ constexpr bool holds_alternative_if_exists(const Variant &v) {
}
}
+/// Common code for emitting an atomic builtin (load, store, cmpxchg).
class AtomicEmitter {
public:
AtomicEmitter(
@@ -869,9 +866,7 @@ class AtomicCompareExchangeEmitter final : public AtomicEmitter {
} // namespace
Error llvm::emitAtomicLoadBuiltin(
- Value *AtomicPtr, Value *RetPtr,
- // std::variant<Value *, bool> IsWeak,
- bool IsVolatile,
+ Value *AtomicPtr, Value *RetPtr, bool IsVolatile,
std::variant<Value *, AtomicOrdering, AtomicOrderingCABI> Memorder,
std::variant<Value *, SyncScope::ID, StringRef> Scope, Type *DataTy,
std::optional<uint64_t> DataSize, std::optional<uint64_t> AvailableSize,
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 58002b841824d4..82bdc719071d99 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -174,7 +174,9 @@ static omp::ScheduleKind getSchedKind(omp::OMPScheduleType SchedType) {
}
}
-static Value *followStoreLoad(Instruction *I, Value *V) {
+/// For a chain of loads and stores inside a BB, searches the first value that
+/// \p V is equal to.
+static Value *followStoreLoadInBB(Instruction *I, Value *V) {
while (true) {
Value *Addr;
while (true) {
@@ -183,7 +185,7 @@ static Value *followStoreLoad(Instruction *I, Value *V) {
return V;
if (!isa<LoadInst>(I))
continue;
- auto LoadI = cast<LoadInst>(I);
+ auto *LoadI = cast<LoadInst>(I);
if (LoadI != V)
continue;
Addr = LoadI->getPointerOperand();
@@ -197,7 +199,7 @@ static Value *followStoreLoad(Instruction *I, Value *V) {
return V;
if (!isa<StoreInst>(I))
continue;
- auto StoreI = cast<StoreInst>(I);
+ auto *StoreI = cast<StoreInst>(I);
if (StoreI->getPointerOperand() != Addr)
continue;
V = StoreI->getValueOperand();
@@ -206,36 +208,47 @@ static Value *followStoreLoad(Instruction *I, Value *V) {
}
}
+/// Searches for the last occurance of an instruction of type \p T in \p BB.
+template <typename T> static T *findLastInstInBB(BasicBlock *BB) {
+ for (Instruction &Cur : reverse(*BB)) {
+ if (T *Candidate = dyn_cast<T>(&Cur))
+ return Candidate;
+ }
+ return nullptr;
+}
+
+/// Collects and returns all llvm::Value's that might be the same as \p Val
+/// without taking control flow into account. Backtracks through loads and
+/// stores of allocas, but not PHIs (yet).
static SetVector<Value *> storedValues(Value *Val) {
SetVector<Value *> Vals;
if (!isa<LoadInst>(Val))
return Vals;
- auto LD = cast<LoadInst>(Val);
+ LoadInst *LD = cast<LoadInst>(Val);
DenseSet<Instruction *> Visited;
SmallVector<Value *> Addrs;
Addrs.push_back(LD->getPointerOperand());
-
while (!Addrs.empty()) {
- auto Addr = Addrs.pop_back_val();
- auto AddrI = dyn_cast<Instruction>(Addr);
+ Value *Addr = Addrs.pop_back_val();
+ auto *AddrI = dyn_cast<Instruction>(Addr);
if (!AddrI)
continue;
if (Visited.contains(AddrI))
continue;
Visited.insert(AddrI);
- for (auto &&U : AddrI->uses()) {
- if (auto S = dyn_cast<StoreInst>(U.getUser())) {
+ for (Use &U : AddrI->uses()) {
+ if (auto *S = dyn_cast<StoreInst>(U.getUser())) {
assert(S->getPointerOperand() == AddrI);
- auto V = S->getValueOperand();
- if (auto ML = dyn_cast<LoadInst>(V))
+ Value *V = S->getValueOperand();
+ if (auto *ML = dyn_cast<LoadInst>(V))
Addrs.push_back(ML->getPointerOperand());
- else
- Vals.insert(V);
- } else if (auto L = dyn_cast<LoadInst>(U.getUser())) {
+ Vals.insert(V);
+ } else if (auto *L = dyn_cast<LoadInst>(U.getUser())) {
Addrs.push_back(L->getPointerOperand());
+ Vals.insert(L);
}
}
}
@@ -243,26 +256,6 @@ static SetVector<Value *> storedValues(Value *Val) {
return Vals;
}
-static Value *followStorePtr(Value *Val) {
- Value *V = Val;
- if (!isa<LoadInst>(Val))
- return V;
- auto LD = cast<LoadInst>(Val);
- auto Alloca = dyn_cast<AllocaInst>(LD->getPointerOperand());
- if (!Alloca)
- return V;
-
- auto STUse = [](Instruction *Addr) -> StoreInst * {
- for (auto &&U : Addr->uses())
- if (auto ST = dyn_cast<StoreInst>(U.getUser()))
- if (ST->getPointerOperand() == Addr &&
- !isa<LoadInst>(ST->getValueOperand()))
- return ST;
- return nullptr;
- }(Alloca);
- return STUse;
-}
-
static StoreInst *findAtomicInst(BasicBlock *EntryBB, Value *XVal) {
StoreInst *StoreofAtomic = nullptr;
for (Instruction &Cur : *EntryBB) {
@@ -276,14 +269,6 @@ static StoreInst *findAtomicInst(BasicBlock *EntryBB, Value *XVal) {
return StoreofAtomic;
}
-template <typename T> static T *findLastInstInBB(BasicBlock *BB) {
- for (Instruction &Cur : reverse(*BB)) {
- if (T *Candidate = dyn_cast<T>(&Cur))
- return Candidate;
- }
- return nullptr;
-}
-
class OpenMPIRBuilderTest : public testing::Test {
protected:
void SetUp() override {
@@ -3877,18 +3862,10 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicReadFlt) {
EXPECT_TRUE((bool)AfterReadIP);
Builder.restoreIP(*AfterReadIP);
- // IntegerType *IntCastTy = IntegerType::get(M->getContext(),
- // Float32->getScalarSizeInBits());
-
LoadInst *AtomicLoad = cast<LoadInst>(VVal->getNextNode());
EXPECT_TRUE(AtomicLoad->isAtomic());
EXPECT_EQ(AtomicLoad->getPointerOperand(), XVal);
- // BitCastInst *CastToFlt = cast<BitCastInst>(AtomicLoad->getNextNode());
- // EXPECT_EQ(CastToFlt->getSrcTy(), IntCastTy);
- // EXPECT_EQ(CastToFlt->getDestTy(), Float32);
- // EXPECT_EQ(CastToFlt->getOperand(0), AtomicLoad);
-
StoreInst *StoreofAtomic = cast<StoreInst>(AtomicLoad->getNextNode());
EXPECT_EQ(StoreofAtomic->getValueOperand(), AtomicLoad);
EXPECT_EQ(StoreofAtomic->getPointerOperand(), VVal);
@@ -3973,15 +3950,11 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteFlt) {
EXPECT_TRUE((bool)AfterWriteIP);
Builder.restoreIP(*AfterWriteIP);
- // IntegerType *IntCastTy = IntegerType::get(M->getContext(),
- // Float32->getScalarSizeInBits());
-
- // Value *ExprCast = Builder.CreateBitCast(ValToWrite, IntCastTy);
-
StoreInst *StoreofAtomic =
findAtomicInst(OMPBuilder.getInsertionPoint().getBlock(), XVal);
- EXPECT_EQ(followStoreLoad(StoreofAtomic, StoreofAtomic->getValueOperand()),
- ValToWrite);
+ EXPECT_EQ(
+ followStoreLoadInBB(StoreofAtomic, StoreofAtomic->getValueOperand()),
+ ValToWrite);
EXPECT_EQ(StoreofAtomic->getPointerOperand(), XVal);
EXPECT_TRUE(StoreofAtomic->isAtomic());
@@ -4019,8 +3992,9 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteInt) {
EXPECT_NE(StoreofAtomic, nullptr);
EXPECT_TRUE(StoreofAtomic->isAtomic());
- EXPECT_EQ(followStoreLoad(StoreofAtomic, StoreofAtomic->getValueOperand()),
- ValToWrite);
+ EXPECT_EQ(
+ followStoreLoadInBB(StoreofAtomic, StoreofAtomic->getValueOperand()),
+ ValToWrite);
Builder.CreateRetVoid();
OMPBuilder.finalize();
@@ -4038,7 +4012,7 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) {
IntegerType *Int32 = Type::getInt32Ty(M->getContext());
AllocaInst *XVal = Builder.CreateAlloca(Int32);
XVal->setName("AtomicVar");
- auto ExpectedVal = ConstantInt::get(Type::getInt32Ty(Ctx), 0U);
+ ConstantInt *ExpectedVal = ConstantInt::get(Type::getInt32Ty(Ctx), 0U);
Builder.CreateStore(ExpectedVal, XVal);
OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
AtomicOrdering AO = AtomicOrdering::Monotonic;
@@ -4069,12 +4043,6 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) {
EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
EXPECT_NE(EndBB, nullptr);
- // PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
- // EXPECT_NE(Phi, nullptr);
- // EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
- // EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
- // EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
-
EXPECT_EQ(Sub->getNumUses(), 1U);
StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
@@ -4110,7 +4078,7 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateFloat) {
Type *FloatTy = Type::getFloatTy(M->getContext());
AllocaInst *XVal = Builder.CreateAlloca(FloatTy);
XVal->setName("AtomicVar");
- auto ExpectedVal = ConstantFP::get(Type::getFloatTy(Ctx), 0.0);
+ ConstantFP *ExpectedVal = ConstantFP::get(Type::getFloatTy(Ctx), 0.0);
Builder.CreateStore(ExpectedVal, XVal);
OpenMPIRBuilder::AtomicOpValue X = {XVal, FloatTy, false, false};
AtomicOrdering AO = AtomicOrdering::Monotonic;
@@ -4140,12 +4108,6 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateFloat) {
EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
EXPECT_NE(EndBB, nullptr);
- // PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
- // EXPECT_NE(Phi, nullptr);
- // EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
- // EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
- // EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
-
EXPECT_EQ(Sub->getNumUses(), 1U);
StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
@@ -4180,7 +4142,7 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateIntr) {
Type *IntTy = Type::getInt32Ty(M->getContext());
AllocaInst *XVal = Builder.CreateAlloca(IntTy);
XVal->setName("AtomicVar");
- auto ExpectedVal = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
+ ConstantInt *ExpectedVal = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
Builder.CreateStore(ExpectedVal, XVal);
OpenMPIRBuilder::AtomicOpValue X = {XVal, IntTy, false, false};
AtomicOrdering AO = AtomicOrdering::Monotonic;
@@ -4210,12 +4172,6 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateIntr) {
EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
EXPECT_NE(EndBB, nullptr);
- // PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
- // EXPECT_NE(Phi, nullptr);
- // EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
- // EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
- // EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
-
EXPECT_EQ(Sub->getNumUses(), 1U);
StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
More information about the flang-commits
mailing list