[clang] [llvm] [LoongArch] Support amcas[_db].{b/h/w/d} instructions. (PR #114189)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 17 21:01:17 PST 2024
https://github.com/tangaac updated https://github.com/llvm/llvm-project/pull/114189
>From 481211fa747bd10df6ca4a51fe3e50fc335d89ef Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Wed, 23 Oct 2024 17:22:25 +0800
Subject: [PATCH 1/2] Support amcas[_db].{b/h/w/d} instructions.
---
clang/include/clang/Driver/Options.td | 4 +
clang/lib/Basic/Targets/LoongArch.cpp | 7 +-
clang/lib/Basic/Targets/LoongArch.h | 2 +
.../lib/Driver/ToolChains/Arch/LoongArch.cpp | 9 +
clang/test/Driver/loongarch-march.c | 8 +-
clang/test/Driver/loongarch-mlamcas.c | 30 +
clang/test/Preprocessor/init-loongarch.c | 25 +-
.../TargetParser/LoongArchTargetParser.def | 3 +-
.../llvm/TargetParser/LoongArchTargetParser.h | 4 +
.../AsmParser/LoongArchAsmParser.cpp | 4 +-
llvm/lib/Target/LoongArch/LoongArch.td | 9 +-
.../LoongArch/LoongArchISelLowering.cpp | 16 +-
.../Target/LoongArch/LoongArchInstrInfo.td | 44 +-
.../TargetParser/LoongArchTargetParser.cpp | 1 +
.../ir-instruction/atomic-cmpxchg.ll | 175 +-
.../ir-instruction/atomicrmw-lamcas.ll | 5341 +++++++++++++++++
16 files changed, 5654 insertions(+), 28 deletions(-)
create mode 100644 clang/test/Driver/loongarch-mlamcas.c
create mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-lamcas.ll
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 9d595984b63c4b..9a69251dbcef42 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5414,6 +5414,10 @@ def mlam_bh : Flag<["-"], "mlam-bh">, Group<m_loongarch_Features_Group>,
HelpText<"Enable amswap[_db].{b/h} and amadd[_db].{b/h}">;
def mno_lam_bh : Flag<["-"], "mno-lam-bh">, Group<m_loongarch_Features_Group>,
HelpText<"Disable amswap[_db].{b/h} and amadd[_db].{b/h}">;
+def mlamcas : Flag<["-"], "mlamcas">, Group<m_loongarch_Features_Group>,
+ HelpText<"Enable amcas[_db].{b/h/w/d}">;
+def mno_lamcas : Flag<["-"], "mno-lamcas">, Group<m_loongarch_Features_Group>,
+ HelpText<"Disable amcas[_db].{b/h/w/d}">;
def mannotate_tablejump : Flag<["-"], "mannotate-tablejump">, Group<m_loongarch_Features_Group>,
HelpText<"Enable annotate table jump instruction to correlate it with the jump table.">;
def mno_annotate_tablejump : Flag<["-"], "mno-annotate-tablejump">, Group<m_loongarch_Features_Group>,
diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp
index 07b22b35f603ce..e08b7a3d96f18b 100644
--- a/clang/lib/Basic/Targets/LoongArch.cpp
+++ b/clang/lib/Basic/Targets/LoongArch.cpp
@@ -205,7 +205,7 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts,
// TODO: As more features of the V1.1 ISA are supported, a unified "v1.1"
// arch feature set will be used to include all sub-features belonging to
// the V1.1 ISA version.
- if (HasFeatureFrecipe && HasFeatureLAM_BH)
+ if (HasFeatureFrecipe && HasFeatureLAM_BH && HasFeatureLAMCAS)
Builder.defineMacro("__loongarch_arch",
Twine('"') + "la64v1.1" + Twine('"'));
else
@@ -239,6 +239,9 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts,
if (HasFeatureLAM_BH)
Builder.defineMacro("__loongarch_lam_bh", Twine(1));
+ if (HasFeatureLAMCAS)
+ Builder.defineMacro("__loongarch_lamcas", Twine(1));
+
StringRef ABI = getABI();
if (ABI == "lp64d" || ABI == "lp64f" || ABI == "lp64s")
Builder.defineMacro("__loongarch_lp64");
@@ -317,6 +320,8 @@ bool LoongArchTargetInfo::handleTargetFeatures(
HasFeatureFrecipe = true;
else if (Feature == "+lam-bh")
HasFeatureLAM_BH = true;
+ else if (Feature == "+lamcas")
+ HasFeatureLAMCAS = true;
}
return true;
}
diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h
index 3585e9f7968b4b..824234ae858608 100644
--- a/clang/lib/Basic/Targets/LoongArch.h
+++ b/clang/lib/Basic/Targets/LoongArch.h
@@ -31,6 +31,7 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo {
bool HasFeatureLASX;
bool HasFeatureFrecipe;
bool HasFeatureLAM_BH;
+ bool HasFeatureLAMCAS;
public:
LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
@@ -41,6 +42,7 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo {
HasFeatureLASX = false;
HasFeatureFrecipe = false;
HasFeatureLAM_BH = false;
+ HasFeatureLAMCAS = false;
LongDoubleWidth = 128;
LongDoubleAlign = 128;
LongDoubleFormat = &llvm::APFloat::IEEEquad();
diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
index e69a5562137ccd..86124fa38d9100 100644
--- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
@@ -269,6 +269,15 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D,
else
Features.push_back("-lam-bh");
}
+
+ // Select lamcas feature determined by -m[no-]lamcas.
+ if (const Arg *A =
+ Args.getLastArg(options::OPT_mlamcas, options::OPT_mno_lamcas)) {
+ if (A->getOption().matches(options::OPT_mlamcas))
+ Features.push_back("+lamcas");
+ else
+ Features.push_back("-lamcas");
+ }
}
std::string loongarch::postProcessTargetCPUString(const std::string &CPU,
diff --git a/clang/test/Driver/loongarch-march.c b/clang/test/Driver/loongarch-march.c
index d4cd5b07ae905f..439cb34cc9e979 100644
--- a/clang/test/Driver/loongarch-march.c
+++ b/clang/test/Driver/loongarch-march.c
@@ -39,21 +39,21 @@
// CC1-LA64V1P1: "-target-cpu" "loongarch64"
// CC1-LA64V1P1-NOT: "-target-feature"
-// CC1-LA64V1P1: "-target-feature" "+64bit" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh"
+// CC1-LA64V1P1: "-target-feature" "+64bit" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh" "-target-feature" "+lamcas"
// CC1-LA64V1P1-NOT: "-target-feature"
// CC1-LA64V1P1: "-target-abi" "lp64d"
// CC1-LA664: "-target-cpu" "la664"
// CC1-LA664-NOT: "-target-feature"
-// CC1-LA664: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh"
+// CC1-LA664: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh" "-target-feature" "+lamcas"
// CC1-LA664-NOT: "-target-feature"
// CC1-LA664: "-target-abi" "lp64d"
// IR-LOONGARCH64: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+f,+ual"
// IR-LA464: attributes #[[#]] ={{.*}}"target-cpu"="la464" {{.*}}"target-features"="+64bit,+d,+f,+lasx,+lsx,+ual"
// IR-LA64V1P0: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+lsx,+ual"
-// IR-LA64V1P1: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+frecipe,+lam-bh,+lsx,+ual"
-// IR-LA664: attributes #[[#]] ={{.*}}"target-cpu"="la664" {{.*}}"target-features"="+64bit,+d,+f,+frecipe,+lam-bh,+lasx,+lsx,+ual"
+// IR-LA64V1P1: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+frecipe,+lam-bh,+lamcas,+lsx,+ual"
+// IR-LA664: attributes #[[#]] ={{.*}}"target-cpu"="la664" {{.*}}"target-features"="+64bit,+d,+f,+frecipe,+lam-bh,+lamcas,+lasx,+lsx,+ual"
int foo(void) {
return 3;
diff --git a/clang/test/Driver/loongarch-mlamcas.c b/clang/test/Driver/loongarch-mlamcas.c
new file mode 100644
index 00000000000000..2185a1a8115d66
--- /dev/null
+++ b/clang/test/Driver/loongarch-mlamcas.c
@@ -0,0 +1,30 @@
+/// Test -m[no]lamcas options.
+
+// RUN: %clang --target=loongarch64 -mlamcas -fsyntax-only %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CC1-LAMCAS
+// RUN: %clang --target=loongarch64 -mno-lamcas -fsyntax-only %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CC1-NO-LAMCAS
+// RUN: %clang --target=loongarch64 -mno-lamcas -mlamcas -fsyntax-only %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CC1-LAMCAS
+// RUN: %clang --target=loongarch64 -mlamcas -mno-lamcas -fsyntax-only %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CC1-NO-LAMCAS
+
+// RUN: %clang --target=loongarch64 -mlamcas -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-LAMCAS
+// RUN: %clang --target=loongarch64 -mno-lamcas -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-NO-LAMCAS
+// RUN: %clang --target=loongarch64 -mno-lamcas -mlamcas -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-LAMCAS
+// RUN: %clang --target=loongarch64 -mlamcas -mno-lamcas -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-NO-LAMCAS
+
+
+// CC1-LAMCAS: "-target-feature" "+lamcas"
+// CC1-NO-LAMCAS: "-target-feature" "-lamcas"
+
+// IR-LAMCAS: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+lamcas{{(,.*)?}}"
+// IR-NO-LAMCAS: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-lamcas{{(,.*)?}}"
+
+int foo(void) {
+ return 42;
+}
diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c
index 8019292e0f10e0..6686ef3ada100a 100644
--- a/clang/test/Preprocessor/init-loongarch.c
+++ b/clang/test/Preprocessor/init-loongarch.c
@@ -798,7 +798,7 @@
// LA64-FPU0-LP64S-NOT: #define __loongarch_single_float
// LA64-FPU0-LP64S: #define __loongarch_soft_float 1
-/// Check __loongarch_arch{_tune/_frecipe/_lam_bh}.
+/// Check __loongarch_arch{_tune/_frecipe/_lam_bh/_lamcas}.
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - | \
// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s
@@ -823,11 +823,11 @@
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx | \
// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 | \
-// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH -DARCH=la64v1.1 -DTUNE=loongarch64 %s
+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS -DARCH=la64v1.1 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -frecipe | \
-// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH -DARCH=la64v1.0 -DTUNE=loongarch64 %s
+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH,LAMCAS -DARCH=la64v1.0 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -lsx | \
-// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH -DARCH=loongarch64 -DTUNE=loongarch64 %s
+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS -DARCH=loongarch64 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +frecipe | \
// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=loongarch64 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +frecipe | \
@@ -835,25 +835,34 @@
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +lam-bh | \
// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH -DARCH=la64v1.0 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -lam-bh | \
-// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la64v1.0 -DTUNE=loongarch64 %s
+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAMCAS -DARCH=la64v1.0 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lam-bh | \
// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH -DARCH=loongarch64 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +lam-bh | \
// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH -DARCH=la64v1.0 -DTUNE=loongarch64 %s
-// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +frecipe -Xclang -target-feature -Xclang +lam-bh | \
+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +lamcas | \
+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAMCAS -DARCH=la64v1.0 -DTUNE=loongarch64 %s
+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -lamcas | \
+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH -DARCH=la64v1.0 -DTUNE=loongarch64 %s
+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lamcas | \
+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAMCAS -DARCH=loongarch64 -DTUNE=loongarch64 %s
+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +lamcas | \
+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAMCAS -DARCH=la64v1.0 -DTUNE=loongarch64 %s
+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +frecipe -Xclang -target-feature -Xclang +lam-bh -Xclang -target-feature -Xclang +lamcas | \
// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE -DARCH=la64v1.1 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 | \
-// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH -DARCH=la664 -DTUNE=la664 %s
+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS -DARCH=la664 -DTUNE=la664 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -mtune=la664 | \
// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=la664 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -mtune=la664 | \
// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la664 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 -mtune=loongarch64 | \
-// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH -DARCH=la664 -DTUNE=loongarch64 %s
+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS -DARCH=la664 -DTUNE=loongarch64 %s
// ARCH-TUNE: #define __loongarch_arch "[[ARCH]]"
// FRECIPE: #define __loongarch_frecipe 1
// LAM-BH: #define __loongarch_lam_bh 1
+// LAMCAS: #define __loongarch_lamcas 1
// ARCH-TUNE: #define __loongarch_tune "[[TUNE]]"
// RUN: %clang --target=loongarch64 -mlsx -x c -E -dM %s -o - \
diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def
index 6cd2018b7b59cb..c0198c2842cad6 100644
--- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def
+++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def
@@ -12,6 +12,7 @@ LOONGARCH_FEATURE("+lvz", FK_LVZ)
LOONGARCH_FEATURE("+ual", FK_UAL)
LOONGARCH_FEATURE("+frecipe", FK_FRECIPE)
LOONGARCH_FEATURE("+lam-bh", FK_LAM_BH)
+LOONGARCH_FEATURE("+lamcas", FK_LAMCAS)
#undef LOONGARCH_FEATURE
@@ -21,6 +22,6 @@ LOONGARCH_FEATURE("+lam-bh", FK_LAM_BH)
LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64 | FK_UAL)
LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL)
-LOONGARCH_ARCH("la664", AK_LA664, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL | FK_FRECIPE | FK_LAM_BH)
+LOONGARCH_ARCH("la664", AK_LA664, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL | FK_FRECIPE | FK_LAM_BH | FK_LAMCAS)
#undef LOONGARCH_ARCH
diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h
index b5be03b1b67fbb..9838a2679424a9 100644
--- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h
+++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h
@@ -53,6 +53,10 @@ enum FeatureKind : uint32_t {
// Atomic memory swap and add instructions for byte and half word are
// available.
FK_LAM_BH = 1 << 10,
+
+ // Atomic memory compare and swap instructions for byte, half word, word and
+ // double word are available.
+ FK_LAMCAS = 1 << 11,
};
struct FeatureInfo {
diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
index b4b19caed8999e..0bf0510456f82a 100644
--- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
+++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
@@ -1562,7 +1562,9 @@ unsigned LoongArchAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
unsigned Opc = Inst.getOpcode();
switch (Opc) {
default:
- if (Opc >= LoongArch::AMADD_D && Opc <= LoongArch::AMXOR_W) {
+ // amcas[_db].{b/h/w/d} didn't need this judgement
+ if ((Opc >= LoongArch::AMADD_B && Opc <= LoongArch::AMAND__DB_W) ||
+ (Opc >= LoongArch::AMMAX_D && Opc <= LoongArch::AMXOR__DB_W)) {
MCRegister Rd = Inst.getOperand(0).getReg();
MCRegister Rk = Inst.getOperand(1).getReg();
MCRegister Rj = Inst.getOperand(2).getReg();
diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td
index 54ebf86666abf9..c528597e2d7774 100644
--- a/llvm/lib/Target/LoongArch/LoongArch.td
+++ b/llvm/lib/Target/LoongArch/LoongArch.td
@@ -118,6 +118,12 @@ def FeatureLAM_BH
"Support amswap[_db].{b/h} and amadd[_db].{b/h} instructions.">;
def HasLAM_BH : Predicate<"Subtarget->hasLAM_BH()">;
+// Atomic memory compare and swap instructions for byte, half word, word and double word
+def FeatureLAMCAS
+ : SubtargetFeature<"lamcas", "HasLAMCAS", "true",
+ "Support amcas[_db].{b/h/w/d}.">;
+def HasLAMCAS : Predicate<"Subtarget->hasLAMCAS()">;
+
def TunePreferWInst
: SubtargetFeature<"prefer-w-inst", "PreferWInst", "true",
"Prefer instructions with W suffix">;
@@ -158,7 +164,8 @@ def : ProcessorModel<"la664", NoSchedModel, [Feature64Bit,
FeatureExtLVZ,
FeatureExtLBT,
FeatureFrecipe,
- FeatureLAM_BH]>;
+ FeatureLAM_BH,
+ FeatureLAMCAS]>;
//===----------------------------------------------------------------------===//
// Define the LoongArch target.
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index e2c644a56c95b0..cf35fc1c992576 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -363,6 +363,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
+
+ // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
+ if (Subtarget.hasLAMCAS())
+ setMinCmpXchgSizeInBits(8);
}
bool LoongArchTargetLowering::isOffsetFoldingLegal(
@@ -5743,6 +5747,10 @@ LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
}
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
+ if (Subtarget.hasLAMCAS() &&
+ (AI->getOperation() == AtomicRMWInst::Nand || Size < 32))
+ return AtomicExpansionKind::CmpXChg;
+
if (Size == 8 || Size == 16)
return AtomicExpansionKind::MaskedIntrinsic;
return AtomicExpansionKind::None;
@@ -5797,6 +5805,10 @@ getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
TargetLowering::AtomicExpansionKind
LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *CI) const {
+
+ if (Subtarget.hasLAMCAS())
+ return AtomicExpansionKind::None;
+
unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
if (Size == 8 || Size == 16)
return AtomicExpansionKind::MaskedIntrinsic;
@@ -6292,8 +6304,8 @@ bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
}
ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
- // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension.
- return ISD::SIGN_EXTEND;
+ // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
+ return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
}
bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall(
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 671b8cc6ffe1b1..a0f2829a619cbd 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -710,10 +710,16 @@ class STORE_2RI14<bits<32> op>
"$rd, $rj, $imm14">;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 1
-let hasSideEffects = 0, mayLoad = 1, mayStore = 1, Constraints = "@earlyclobber $rd" in
+let hasSideEffects = 0, mayLoad = 1, mayStore = 1, Constraints = "@earlyclobber $rd" in {
class AM_3R<bits<32> op>
: Fmt3R<op, (outs GPR:$rd), (ins GPR:$rk, GPRMemAtomic:$rj),
"$rd, $rk, $rj">;
+}
+
+let hasSideEffects = 0, mayLoad = 1, mayStore = 1, Constraints = "$rd = $dst" in
+class AMCAS_3R<bits<32> op>
+ : Fmt3R<op, (outs GPR:$dst), (ins GPR:$rd, GPR:$rk, GPRMemAtomic:$rj),
+ "$rd, $rk, $rj">;
let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
class LLBase<bits<32> op>
@@ -1024,14 +1030,14 @@ def AMMAX__DB_WU : AM_3R<0x38700000>;
def AMMAX__DB_DU : AM_3R<0x38708000>;
def AMMIN__DB_WU : AM_3R<0x38710000>;
def AMMIN__DB_DU : AM_3R<0x38718000>;
-def AMCAS_B : AM_3R<0x38580000>;
-def AMCAS_H : AM_3R<0x38588000>;
-def AMCAS_W : AM_3R<0x38590000>;
-def AMCAS_D : AM_3R<0x38598000>;
-def AMCAS__DB_B : AM_3R<0x385a0000>;
-def AMCAS__DB_H : AM_3R<0x385a8000>;
-def AMCAS__DB_W : AM_3R<0x385b0000>;
-def AMCAS__DB_D : AM_3R<0x385b8000>;
+def AMCAS_B : AMCAS_3R<0x38580000>;
+def AMCAS_H : AMCAS_3R<0x38588000>;
+def AMCAS_W : AMCAS_3R<0x38590000>;
+def AMCAS_D : AMCAS_3R<0x38598000>;
+def AMCAS__DB_B : AMCAS_3R<0x385a0000>;
+def AMCAS__DB_H : AMCAS_3R<0x385a8000>;
+def AMCAS__DB_W : AMCAS_3R<0x385b0000>;
+def AMCAS__DB_D : AMCAS_3R<0x385b8000>;
def LL_D : LLBase<0x22000000>;
def SC_D : SCBase<0x23000000>;
def SC_Q : SCBase_128<0x38570000>;
@@ -2106,6 +2112,26 @@ def : Pat<(atomic_load_sub_i16 GPR:$rj, GPR:$rk),
(AMADD__DB_H (SUB_W R0, GPR:$rk), GPR:$rj)>;
} // Predicates = [ IsLA64, HasLAM_BH ]
+let Predicates = [ HasLAMCAS, IsLA64 ] in {
+
+def : Pat<(atomic_cmp_swap_i8_monotonic GPR:$addr, GPR:$cmp, GPR:$new),
+ (AMCAS_B GPR:$cmp, GPR:$new, GPR:$addr)>;
+def : Pat<(atomic_cmp_swap_i16_monotonic GPR:$addr, GPR:$cmp, GPR:$new),
+ (AMCAS_H GPR:$cmp, GPR:$new, GPR:$addr)>;
+def : Pat<(atomic_cmp_swap_i32_monotonic GPR:$addr, GPR:$cmp, GPR:$new),
+ (AMCAS_W GPR:$cmp, GPR:$new, GPR:$addr)>;
+def : Pat<(atomic_cmp_swap_i64_monotonic GPR:$addr, GPR:$cmp, GPR:$new),
+ (AMCAS_D GPR:$cmp, GPR:$new, GPR:$addr)>;
+
+def : Pat<(atomic_cmp_swap_i8 GPR:$addr, GPR:$cmp, GPR:$new),
+ (AMCAS__DB_B GPR:$cmp, GPR:$new, GPR:$addr)>;
+def : Pat<(atomic_cmp_swap_i16 GPR:$addr, GPR:$cmp, GPR:$new),
+ (AMCAS__DB_H GPR:$cmp, GPR:$new, GPR:$addr)>;
+def : Pat<(atomic_cmp_swap_i32 GPR:$addr, GPR:$cmp, GPR:$new),
+ (AMCAS__DB_W GPR:$cmp, GPR:$new, GPR:$addr)>;
+def : Pat<(atomic_cmp_swap_i64 GPR:$addr, GPR:$cmp, GPR:$new),
+ (AMCAS__DB_D GPR:$cmp, GPR:$new, GPR:$addr)>;
+}
let Predicates = [IsLA64] in {
diff --git a/llvm/lib/TargetParser/LoongArchTargetParser.cpp b/llvm/lib/TargetParser/LoongArchTargetParser.cpp
index 27e3b5683c5a6e..891174a4afcef0 100644
--- a/llvm/lib/TargetParser/LoongArchTargetParser.cpp
+++ b/llvm/lib/TargetParser/LoongArchTargetParser.cpp
@@ -53,6 +53,7 @@ bool LoongArch::getArchFeatures(StringRef Arch,
if (Arch == "la64v1.1") {
Features.push_back("+frecipe");
Features.push_back("+lam-bh");
+ Features.push_back("+lamcas");
}
return true;
}
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll
index ad98397dfe8f02..8e6ce480e5a901 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc --mtriple=loongarch64 -mattr=+d < %s | FileCheck %s --check-prefix=LA64
+; RUN: llc --mtriple=loongarch64 -mattr=+d,-lamcas < %s | FileCheck %s --check-prefix=LA64
+; RUN: llc --mtriple=loongarch64 -mattr=+d,+lamcas < %s | FileCheck %s --check-prefix=LA64-LAMCAS
define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; LA64-LABEL: cmpxchg_i8_acquire_acquire:
@@ -26,6 +27,11 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB0_4:
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i8_acquire_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: amcas_db.b $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: ret
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire
ret void
}
@@ -56,6 +62,11 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB1_4:
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i16_acquire_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: amcas_db.h $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: ret
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire
ret void
}
@@ -76,6 +87,11 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB2_4:
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i32_acquire_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: amcas_db.w $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: ret
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire
ret void
}
@@ -95,6 +111,11 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB3_4:
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i64_acquire_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: amcas_db.d $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: ret
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire
ret void
}
@@ -124,6 +145,11 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; LA64-NEXT: dbar 1792
; LA64-NEXT: .LBB4_4:
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i8_acquire_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: amcas_db.b $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: ret
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic
ret void
}
@@ -154,6 +180,11 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; LA64-NEXT: dbar 1792
; LA64-NEXT: .LBB5_4:
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i16_acquire_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: amcas_db.h $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: ret
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic
ret void
}
@@ -174,6 +205,11 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
; LA64-NEXT: dbar 1792
; LA64-NEXT: .LBB6_4:
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i32_acquire_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: amcas_db.w $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: ret
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic
ret void
}
@@ -193,6 +229,11 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
; LA64-NEXT: dbar 1792
; LA64-NEXT: .LBB7_4:
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i64_acquire_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: amcas_db.d $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: ret
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic
ret void
}
@@ -223,6 +264,12 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind
; LA64-NEXT: .LBB8_4:
; LA64-NEXT: srl.w $a0, $a5, $a3
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i8_acquire_acquire_reti8:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: amcas_db.b $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: move $a0, $a1
+; LA64-LAMCAS-NEXT: ret
%tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire
%res = extractvalue { i8, i1 } %tmp, 0
ret i8 %res
@@ -255,6 +302,12 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou
; LA64-NEXT: .LBB9_4:
; LA64-NEXT: srl.w $a0, $a5, $a3
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i16_acquire_acquire_reti16:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: amcas_db.h $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: move $a0, $a1
+; LA64-LAMCAS-NEXT: ret
%tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire
%res = extractvalue { i16, i1 } %tmp, 0
ret i16 %res
@@ -277,6 +330,12 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou
; LA64-NEXT: .LBB10_4:
; LA64-NEXT: move $a0, $a1
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i32_acquire_acquire_reti32:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: amcas_db.w $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: move $a0, $a1
+; LA64-LAMCAS-NEXT: ret
%tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire
%res = extractvalue { i32, i1 } %tmp, 0
ret i32 %res
@@ -298,6 +357,12 @@ define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nou
; LA64-NEXT: .LBB11_4:
; LA64-NEXT: move $a0, $a3
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i64_acquire_acquire_reti64:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: amcas_db.d $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: move $a0, $a1
+; LA64-LAMCAS-NEXT: ret
%tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire
%res = extractvalue { i64, i1 } %tmp, 0
ret i64 %res
@@ -331,6 +396,14 @@ define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind
; LA64-NEXT: xor $a0, $a1, $a0
; LA64-NEXT: sltui $a0, $a0, 1
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i8_acquire_acquire_reti1:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: ext.w.b $a3, $a1
+; LA64-LAMCAS-NEXT: amcas_db.b $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: xor $a0, $a1, $a3
+; LA64-LAMCAS-NEXT: sltui $a0, $a0, 1
+; LA64-LAMCAS-NEXT: ret
%tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire
%res = extractvalue { i8, i1 } %tmp, 1
ret i1 %res
@@ -365,6 +438,14 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw
; LA64-NEXT: xor $a0, $a1, $a0
; LA64-NEXT: sltui $a0, $a0, 1
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i16_acquire_acquire_reti1:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: ext.w.h $a3, $a1
+; LA64-LAMCAS-NEXT: amcas_db.h $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: xor $a0, $a1, $a3
+; LA64-LAMCAS-NEXT: sltui $a0, $a0, 1
+; LA64-LAMCAS-NEXT: ret
%tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire
%res = extractvalue { i16, i1 } %tmp, 1
ret i1 %res
@@ -388,6 +469,14 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw
; LA64-NEXT: xor $a0, $a3, $a1
; LA64-NEXT: sltui $a0, $a0, 1
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i32_acquire_acquire_reti1:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: addi.w $a3, $a1, 0
+; LA64-LAMCAS-NEXT: amcas_db.w $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: xor $a0, $a1, $a3
+; LA64-LAMCAS-NEXT: sltui $a0, $a0, 1
+; LA64-LAMCAS-NEXT: ret
%tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire
%res = extractvalue { i32, i1 } %tmp, 1
ret i1 %res
@@ -410,6 +499,14 @@ define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounw
; LA64-NEXT: xor $a0, $a3, $a1
; LA64-NEXT: sltui $a0, $a0, 1
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i64_acquire_acquire_reti1:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a3, $a1
+; LA64-LAMCAS-NEXT: amcas_db.d $a3, $a2, $a0
+; LA64-LAMCAS-NEXT: xor $a0, $a3, $a1
+; LA64-LAMCAS-NEXT: sltui $a0, $a0, 1
+; LA64-LAMCAS-NEXT: ret
%tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire
%res = extractvalue { i64, i1 } %tmp, 1
ret i1 %res
@@ -440,6 +537,11 @@ define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind
; LA64-NEXT: dbar 1792
; LA64-NEXT: .LBB16_4:
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i8_monotonic_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: amcas.b $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: ret
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic
ret void
}
@@ -470,6 +572,11 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw
; LA64-NEXT: dbar 1792
; LA64-NEXT: .LBB17_4:
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i16_monotonic_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: amcas.h $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: ret
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic
ret void
}
@@ -490,6 +597,11 @@ define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounw
; LA64-NEXT: dbar 1792
; LA64-NEXT: .LBB18_4:
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i32_monotonic_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: amcas.w $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: ret
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic
ret void
}
@@ -509,6 +621,11 @@ define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounw
; LA64-NEXT: dbar 1792
; LA64-NEXT: .LBB19_4:
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i64_monotonic_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: amcas.d $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: ret
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic
ret void
}
@@ -539,6 +656,12 @@ define i8 @cmpxchg_i8_monotonic_monotonic_reti8(ptr %ptr, i8 %cmp, i8 %val) noun
; LA64-NEXT: .LBB20_4:
; LA64-NEXT: srl.w $a0, $a5, $a3
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i8_monotonic_monotonic_reti8:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: amcas.b $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: move $a0, $a1
+; LA64-LAMCAS-NEXT: ret
%tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic
%res = extractvalue { i8, i1 } %tmp, 0
ret i8 %res
@@ -571,6 +694,12 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val)
; LA64-NEXT: .LBB21_4:
; LA64-NEXT: srl.w $a0, $a5, $a3
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i16_monotonic_monotonic_reti16:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: amcas.h $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: move $a0, $a1
+; LA64-LAMCAS-NEXT: ret
%tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic
%res = extractvalue { i16, i1 } %tmp, 0
ret i16 %res
@@ -593,6 +722,12 @@ define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val)
; LA64-NEXT: .LBB22_4:
; LA64-NEXT: move $a0, $a1
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i32_monotonic_monotonic_reti32:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: amcas.w $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: move $a0, $a1
+; LA64-LAMCAS-NEXT: ret
%tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic
%res = extractvalue { i32, i1 } %tmp, 0
ret i32 %res
@@ -614,6 +749,12 @@ define i64 @cmpxchg_i64_monotonic_monotonic_reti64(ptr %ptr, i64 %cmp, i64 %val)
; LA64-NEXT: .LBB23_4:
; LA64-NEXT: move $a0, $a3
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i64_monotonic_monotonic_reti64:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: amcas.d $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: move $a0, $a1
+; LA64-LAMCAS-NEXT: ret
%tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic
%res = extractvalue { i64, i1 } %tmp, 0
ret i64 %res
@@ -647,6 +788,14 @@ define i1 @cmpxchg_i8_monotonic_monotonic_reti1(ptr %ptr, i8 %cmp, i8 %val) noun
; LA64-NEXT: xor $a0, $a1, $a0
; LA64-NEXT: sltui $a0, $a0, 1
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i8_monotonic_monotonic_reti1:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: ext.w.b $a3, $a1
+; LA64-LAMCAS-NEXT: amcas.b $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: xor $a0, $a1, $a3
+; LA64-LAMCAS-NEXT: sltui $a0, $a0, 1
+; LA64-LAMCAS-NEXT: ret
%tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic
%res = extractvalue { i8, i1 } %tmp, 1
ret i1 %res
@@ -681,6 +830,14 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n
; LA64-NEXT: xor $a0, $a1, $a0
; LA64-NEXT: sltui $a0, $a0, 1
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i16_monotonic_monotonic_reti1:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: ext.w.h $a3, $a1
+; LA64-LAMCAS-NEXT: amcas.h $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: xor $a0, $a1, $a3
+; LA64-LAMCAS-NEXT: sltui $a0, $a0, 1
+; LA64-LAMCAS-NEXT: ret
%tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic
%res = extractvalue { i16, i1 } %tmp, 1
ret i1 %res
@@ -704,6 +861,14 @@ define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) n
; LA64-NEXT: xor $a0, $a3, $a1
; LA64-NEXT: sltui $a0, $a0, 1
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i32_monotonic_monotonic_reti1:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: addi.w $a3, $a1, 0
+; LA64-LAMCAS-NEXT: amcas.w $a1, $a2, $a0
+; LA64-LAMCAS-NEXT: xor $a0, $a1, $a3
+; LA64-LAMCAS-NEXT: sltui $a0, $a0, 1
+; LA64-LAMCAS-NEXT: ret
%tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic
%res = extractvalue { i32, i1 } %tmp, 1
ret i1 %res
@@ -726,6 +891,14 @@ define i1 @cmpxchg_i64_monotonic_monotonic_reti1(ptr %ptr, i64 %cmp, i64 %val) n
; LA64-NEXT: xor $a0, $a3, $a1
; LA64-NEXT: sltui $a0, $a0, 1
; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: cmpxchg_i64_monotonic_monotonic_reti1:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a3, $a1
+; LA64-LAMCAS-NEXT: amcas.d $a3, $a2, $a0
+; LA64-LAMCAS-NEXT: xor $a0, $a3, $a1
+; LA64-LAMCAS-NEXT: sltui $a0, $a0, 1
+; LA64-LAMCAS-NEXT: ret
%tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic
%res = extractvalue { i64, i1 } %tmp, 1
ret i1 %res
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-lamcas.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-lamcas.ll
new file mode 100644
index 00000000000000..2f677038e1db37
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-lamcas.ll
@@ -0,0 +1,5341 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch64 -mattr=+d,-lamcas < %s | FileCheck %s --check-prefix=LA64
+; RUN: llc --mtriple=loongarch64 -mattr=+d,+lamcas < %s | FileCheck %s --check-prefix=LA64-LAMCAS
+
+define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_xchg_i8_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: addi.w $a5, $a1, 0
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB0_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_i8_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: ld.bu $a2, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB0_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a3, $a2
+; LA64-LAMCAS-NEXT: amcas_db.b $a2, $a1, $a0
+; LA64-LAMCAS-NEXT: bne $a2, $a3, .LBB0_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: move $a0, $a2
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind {
+; LA64-LABEL: atomicrmw_xchg_0_i8_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a1, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a2, $zero, 255
+; LA64-NEXT: sll.w $a2, $a2, $a1
+; LA64-NEXT: nor $a2, $a2, $zero
+; LA64-NEXT: amand_db.w $a3, $a2, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a1
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_0_i8_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a1, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB1_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a2, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $zero, $a1
+; LA64-LAMCAS-NEXT: bne $a0, $a2, .LBB1_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i8 0 acquire
+ ret i8 %1
+}
+
+define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind {
+; LA64-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a1, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a2, $zero, 255
+; LA64-NEXT: sll.w $a2, $a2, $a1
+; LA64-NEXT: amor_db.w $a3, $a2, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a1
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a1, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: addi.w $a2, $zero, -1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB2_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a3, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a2, $a1
+; LA64-LAMCAS-NEXT: bne $a0, $a3, .LBB2_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i8 -1 acquire
+ ret i8 %1
+}
+
+define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_xchg_i16_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: addi.w $a5, $a1, 0
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB3_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_i16_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: ld.hu $a2, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB3_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a3, $a2
+; LA64-LAMCAS-NEXT: amcas_db.h $a2, $a1, $a0
+; LA64-LAMCAS-NEXT: bne $a2, $a3, .LBB3_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: move $a0, $a2
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i16 %b acquire
+ ret i16 %1
+}
+
+define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
+; LA64-LABEL: atomicrmw_xchg_0_i16_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a1, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a2, 15
+; LA64-NEXT: ori $a2, $a2, 4095
+; LA64-NEXT: sll.w $a2, $a2, $a1
+; LA64-NEXT: nor $a2, $a2, $zero
+; LA64-NEXT: amand_db.w $a3, $a2, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a1
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_0_i16_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a1, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB4_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a2, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $zero, $a1
+; LA64-LAMCAS-NEXT: bne $a0, $a2, .LBB4_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i16 0 acquire
+ ret i16 %1
+}
+
+define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind {
+; LA64-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a1, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a2, 15
+; LA64-NEXT: ori $a2, $a2, 4095
+; LA64-NEXT: sll.w $a2, $a2, $a1
+; LA64-NEXT: amor_db.w $a3, $a2, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a1
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a1, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: addi.w $a2, $zero, -1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB5_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a3, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a2, $a1
+; LA64-LAMCAS-NEXT: bne $a0, $a3, .LBB5_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i16 -1 acquire
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_add_i8_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: add.w $a5, $a4, $a1
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB6_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_add_i8_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB6_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: add.d $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB6_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw add ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_add_i16_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: add.w $a5, $a4, $a1
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB7_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_add_i16_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB7_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: add.d $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB7_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw add ptr %a, i16 %b acquire
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_sub_i8_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: sub.w $a5, $a4, $a1
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB8_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_sub_i8_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB8_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: sub.d $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB8_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw sub ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_sub_i16_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: sub.w $a5, $a4, $a1
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB9_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_sub_i16_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB9_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: sub.d $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB9_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw sub ptr %a, i16 %b acquire
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_umax_i8_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a6, $a1, .LBB10_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB10_3: # in Loop: Header=BB10_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB10_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_umax_i8_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: andi $a3, $a1, 255
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB10_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: andi $a4, $a0, 255
+; LA64-LAMCAS-NEXT: sltu $a4, $a3, $a4
+; LA64-LAMCAS-NEXT: masknez $a5, $a1, $a4
+; LA64-LAMCAS-NEXT: maskeqz $a4, $a0, $a4
+; LA64-LAMCAS-NEXT: or $a4, $a4, $a5
+; LA64-LAMCAS-NEXT: ext.w.b $a5, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a5, .LBB10_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw umax ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_umax_i16_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a6, $a1, .LBB11_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB11_3: # in Loop: Header=BB11_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB11_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_umax_i16_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: bstrpick.d $a3, $a1, 15, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB11_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: bstrpick.d $a4, $a0, 15, 0
+; LA64-LAMCAS-NEXT: sltu $a4, $a3, $a4
+; LA64-LAMCAS-NEXT: masknez $a5, $a1, $a4
+; LA64-LAMCAS-NEXT: maskeqz $a4, $a0, $a4
+; LA64-LAMCAS-NEXT: or $a4, $a4, $a5
+; LA64-LAMCAS-NEXT: ext.w.h $a5, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a5, .LBB11_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw umax ptr %a, i16 %b acquire
+ ret i16 %1
+}
+
+define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_umin_i8_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a1, $a6, .LBB12_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB12_3: # in Loop: Header=BB12_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB12_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_umin_i8_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: andi $a3, $a1, 255
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB12_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: andi $a4, $a0, 255
+; LA64-LAMCAS-NEXT: sltu $a4, $a3, $a4
+; LA64-LAMCAS-NEXT: xori $a4, $a4, 1
+; LA64-LAMCAS-NEXT: masknez $a5, $a1, $a4
+; LA64-LAMCAS-NEXT: maskeqz $a4, $a0, $a4
+; LA64-LAMCAS-NEXT: or $a4, $a4, $a5
+; LA64-LAMCAS-NEXT: ext.w.b $a5, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a5, .LBB12_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw umin ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_umin_i16_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a1, $a6, .LBB13_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB13_3: # in Loop: Header=BB13_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB13_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_umin_i16_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: bstrpick.d $a3, $a1, 15, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB13_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: bstrpick.d $a4, $a0, 15, 0
+; LA64-LAMCAS-NEXT: sltu $a4, $a3, $a4
+; LA64-LAMCAS-NEXT: xori $a4, $a4, 1
+; LA64-LAMCAS-NEXT: masknez $a5, $a1, $a4
+; LA64-LAMCAS-NEXT: maskeqz $a4, $a0, $a4
+; LA64-LAMCAS-NEXT: or $a4, $a4, $a5
+; LA64-LAMCAS-NEXT: ext.w.h $a5, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a5, .LBB13_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw umin ptr %a, i16 %b acquire
+ ret i16 %1
+}
+
+define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_max_i8_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a3, $a2, 24
+; LA64-NEXT: ori $a4, $zero, 255
+; LA64-NEXT: sll.w $a4, $a4, $a2
+; LA64-NEXT: ext.w.b $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: xori $a3, $a3, 56
+; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a7, $a1, .LBB14_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB14_3: # in Loop: Header=BB14_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a0, 0
+; LA64-NEXT: beqz $a6, .LBB14_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a5, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_max_i8_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: ext.w.b $a3, $a1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB14_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: slt $a5, $a3, $a4
+; LA64-LAMCAS-NEXT: masknez $a6, $a1, $a5
+; LA64-LAMCAS-NEXT: maskeqz $a5, $a0, $a5
+; LA64-LAMCAS-NEXT: or $a5, $a5, $a6
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a5, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB14_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw max ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_max_i16_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a3, $a2, 24
+; LA64-NEXT: lu12i.w $a4, 15
+; LA64-NEXT: ori $a4, $a4, 4095
+; LA64-NEXT: sll.w $a4, $a4, $a2
+; LA64-NEXT: ext.w.h $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: ori $a5, $zero, 48
+; LA64-NEXT: sub.d $a3, $a5, $a3
+; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a7, $a1, .LBB15_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB15_3: # in Loop: Header=BB15_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a0, 0
+; LA64-NEXT: beqz $a6, .LBB15_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a5, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_max_i16_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: ext.w.h $a3, $a1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB15_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: slt $a5, $a3, $a4
+; LA64-LAMCAS-NEXT: masknez $a6, $a1, $a5
+; LA64-LAMCAS-NEXT: maskeqz $a5, $a0, $a5
+; LA64-LAMCAS-NEXT: or $a5, $a5, $a6
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a5, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB15_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw max ptr %a, i16 %b acquire
+ ret i16 %1
+}
+
+define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_min_i8_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a3, $a2, 24
+; LA64-NEXT: ori $a4, $zero, 255
+; LA64-NEXT: sll.w $a4, $a4, $a2
+; LA64-NEXT: ext.w.b $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: xori $a3, $a3, 56
+; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a1, $a7, .LBB16_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB16_3: # in Loop: Header=BB16_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a0, 0
+; LA64-NEXT: beqz $a6, .LBB16_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a5, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_min_i8_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: ext.w.b $a3, $a1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB16_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: slt $a5, $a3, $a4
+; LA64-LAMCAS-NEXT: xori $a5, $a5, 1
+; LA64-LAMCAS-NEXT: masknez $a6, $a1, $a5
+; LA64-LAMCAS-NEXT: maskeqz $a5, $a0, $a5
+; LA64-LAMCAS-NEXT: or $a5, $a5, $a6
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a5, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB16_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw min ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_min_i16_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a3, $a2, 24
+; LA64-NEXT: lu12i.w $a4, 15
+; LA64-NEXT: ori $a4, $a4, 4095
+; LA64-NEXT: sll.w $a4, $a4, $a2
+; LA64-NEXT: ext.w.h $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: ori $a5, $zero, 48
+; LA64-NEXT: sub.d $a3, $a5, $a3
+; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a1, $a7, .LBB17_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB17_3: # in Loop: Header=BB17_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a0, 0
+; LA64-NEXT: beqz $a6, .LBB17_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a5, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_min_i16_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: ext.w.h $a3, $a1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB17_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: slt $a5, $a3, $a4
+; LA64-LAMCAS-NEXT: xori $a5, $a5, 1
+; LA64-LAMCAS-NEXT: masknez $a6, $a1, $a5
+; LA64-LAMCAS-NEXT: maskeqz $a5, $a0, $a5
+; LA64-LAMCAS-NEXT: or $a5, $a5, $a6
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a5, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB17_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw min ptr %a, i16 %b acquire
+ ret i16 %1
+}
+
+define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_nand_i8_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a5, $a4, $a1
+; LA64-NEXT: nor $a5, $a5, $zero
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB18_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_nand_i8_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB18_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: nor $a3, $a3, $zero
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB18_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw nand ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_nand_i16_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a5, $a4, $a1
+; LA64-NEXT: nor $a5, $a5, $zero
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB19_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_nand_i16_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB19_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: nor $a3, $a3, $zero
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB19_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw nand ptr %a, i16 %b acquire
+ ret i16 %1
+
+}
+
+define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind {
+; LA64-LABEL: atomicrmw_nand_i32_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a2, $a0, 0
+; LA64-NEXT: and $a3, $a2, $a1
+; LA64-NEXT: nor $a3, $a3, $zero
+; LA64-NEXT: sc.w $a3, $a0, 0
+; LA64-NEXT: beqz $a3, .LBB20_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_nand_i32_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.w $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB20_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: move $a3, $a0
+; LA64-LAMCAS-NEXT: and $a4, $a0, $a1
+; LA64-LAMCAS-NEXT: nor $a4, $a4, $zero
+; LA64-LAMCAS-NEXT: amcas_db.w $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a3, .LBB20_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw nand ptr %a, i32 %b acquire
+ ret i32 %1
+}
+
+define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind {
+; LA64-LABEL: atomicrmw_nand_i64_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.d $a2, $a0, 0
+; LA64-NEXT: and $a3, $a2, $a1
+; LA64-NEXT: nor $a3, $a3, $zero
+; LA64-NEXT: sc.d $a3, $a0, 0
+; LA64-NEXT: beqz $a3, .LBB21_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_nand_i64_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.d $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB21_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: move $a3, $a0
+; LA64-LAMCAS-NEXT: and $a4, $a0, $a1
+; LA64-LAMCAS-NEXT: nor $a4, $a4, $zero
+; LA64-LAMCAS-NEXT: amcas_db.d $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a3, .LBB21_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw nand ptr %a, i64 %b acquire
+ ret i64 %1
+}
+
+define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_and_i8_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: orn $a1, $a1, $a3
+; LA64-NEXT: amand_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_and_i8_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB22_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB22_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw and ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_and_i16_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: orn $a1, $a1, $a3
+; LA64-NEXT: amand_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_and_i16_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB23_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB23_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw and ptr %a, i16 %b acquire
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_or_i8_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: amor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_or_i8_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB24_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: or $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB24_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw or ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_or_i16_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: amor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_or_i16_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB25_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: or $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB25_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw or ptr %a, i16 %b acquire
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_xor_i8_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: amxor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xor_i8_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB26_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: xor $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB26_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xor ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_xor_i16_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: amxor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xor_i16_acquire:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB27_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: xor $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB27_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xor ptr %a, i16 %b acquire
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_xchg_i8_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: addi.w $a5, $a1, 0
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB28_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_i8_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: ld.bu $a2, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB28_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a3, $a2
+; LA64-LAMCAS-NEXT: amcas_db.b $a2, $a1, $a0
+; LA64-LAMCAS-NEXT: bne $a2, $a3, .LBB28_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: move $a0, $a2
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i8 %b release
+ ret i8 %1
+}
+
+define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind {
+; LA64-LABEL: atomicrmw_xchg_0_i8_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a1, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a2, $zero, 255
+; LA64-NEXT: sll.w $a2, $a2, $a1
+; LA64-NEXT: nor $a2, $a2, $zero
+; LA64-NEXT: amand_db.w $a3, $a2, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a1
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_0_i8_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a1, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB29_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a2, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $zero, $a1
+; LA64-LAMCAS-NEXT: bne $a0, $a2, .LBB29_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i8 0 release
+ ret i8 %1
+}
+
+define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind {
+; LA64-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a1, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a2, $zero, 255
+; LA64-NEXT: sll.w $a2, $a2, $a1
+; LA64-NEXT: amor_db.w $a3, $a2, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a1
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a1, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: addi.w $a2, $zero, -1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB30_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a3, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a2, $a1
+; LA64-LAMCAS-NEXT: bne $a0, $a3, .LBB30_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i8 -1 release
+ ret i8 %1
+}
+
+define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_xchg_i16_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: addi.w $a5, $a1, 0
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB31_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_i16_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: ld.hu $a2, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB31_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a3, $a2
+; LA64-LAMCAS-NEXT: amcas_db.h $a2, $a1, $a0
+; LA64-LAMCAS-NEXT: bne $a2, $a3, .LBB31_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: move $a0, $a2
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i16 %b release
+ ret i16 %1
+}
+
+define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind {
+; LA64-LABEL: atomicrmw_xchg_0_i16_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a1, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a2, 15
+; LA64-NEXT: ori $a2, $a2, 4095
+; LA64-NEXT: sll.w $a2, $a2, $a1
+; LA64-NEXT: nor $a2, $a2, $zero
+; LA64-NEXT: amand_db.w $a3, $a2, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a1
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_0_i16_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a1, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB32_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a2, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $zero, $a1
+; LA64-LAMCAS-NEXT: bne $a0, $a2, .LBB32_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i16 0 release
+ ret i16 %1
+}
+
+define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind {
+; LA64-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a1, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a2, 15
+; LA64-NEXT: ori $a2, $a2, 4095
+; LA64-NEXT: sll.w $a2, $a2, $a1
+; LA64-NEXT: amor_db.w $a3, $a2, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a1
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a1, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: addi.w $a2, $zero, -1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB33_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a3, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a2, $a1
+; LA64-LAMCAS-NEXT: bne $a0, $a3, .LBB33_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i16 -1 release
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_add_i8_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: add.w $a5, $a4, $a1
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB34_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_add_i8_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB34_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: add.d $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB34_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw add ptr %a, i8 %b release
+ ret i8 %1
+}
+
+define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_add_i16_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: add.w $a5, $a4, $a1
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB35_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_add_i16_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB35_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: add.d $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB35_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw add ptr %a, i16 %b release
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_sub_i8_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: sub.w $a5, $a4, $a1
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB36_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_sub_i8_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB36_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: sub.d $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB36_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw sub ptr %a, i8 %b release
+ ret i8 %1
+}
+
+define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_sub_i16_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: sub.w $a5, $a4, $a1
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB37_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_sub_i16_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB37_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: sub.d $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB37_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw sub ptr %a, i16 %b release
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_umax_i8_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a6, $a1, .LBB38_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB38_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB38_3: # in Loop: Header=BB38_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB38_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_umax_i8_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: andi $a3, $a1, 255
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB38_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: andi $a4, $a0, 255
+; LA64-LAMCAS-NEXT: sltu $a4, $a3, $a4
+; LA64-LAMCAS-NEXT: masknez $a5, $a1, $a4
+; LA64-LAMCAS-NEXT: maskeqz $a4, $a0, $a4
+; LA64-LAMCAS-NEXT: or $a4, $a4, $a5
+; LA64-LAMCAS-NEXT: ext.w.b $a5, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a5, .LBB38_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw umax ptr %a, i8 %b release
+ ret i8 %1
+}
+
+define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_umax_i16_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a6, $a1, .LBB39_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB39_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB39_3: # in Loop: Header=BB39_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB39_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_umax_i16_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: bstrpick.d $a3, $a1, 15, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB39_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: bstrpick.d $a4, $a0, 15, 0
+; LA64-LAMCAS-NEXT: sltu $a4, $a3, $a4
+; LA64-LAMCAS-NEXT: masknez $a5, $a1, $a4
+; LA64-LAMCAS-NEXT: maskeqz $a4, $a0, $a4
+; LA64-LAMCAS-NEXT: or $a4, $a4, $a5
+; LA64-LAMCAS-NEXT: ext.w.h $a5, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a5, .LBB39_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw umax ptr %a, i16 %b release
+ ret i16 %1
+}
+
+define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_umin_i8_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a1, $a6, .LBB40_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB40_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB40_3: # in Loop: Header=BB40_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB40_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_umin_i8_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: andi $a3, $a1, 255
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB40_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: andi $a4, $a0, 255
+; LA64-LAMCAS-NEXT: sltu $a4, $a3, $a4
+; LA64-LAMCAS-NEXT: xori $a4, $a4, 1
+; LA64-LAMCAS-NEXT: masknez $a5, $a1, $a4
+; LA64-LAMCAS-NEXT: maskeqz $a4, $a0, $a4
+; LA64-LAMCAS-NEXT: or $a4, $a4, $a5
+; LA64-LAMCAS-NEXT: ext.w.b $a5, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a5, .LBB40_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw umin ptr %a, i8 %b release
+ ret i8 %1
+}
+
+define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_umin_i16_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a1, $a6, .LBB41_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB41_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB41_3: # in Loop: Header=BB41_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB41_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_umin_i16_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: bstrpick.d $a3, $a1, 15, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB41_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: bstrpick.d $a4, $a0, 15, 0
+; LA64-LAMCAS-NEXT: sltu $a4, $a3, $a4
+; LA64-LAMCAS-NEXT: xori $a4, $a4, 1
+; LA64-LAMCAS-NEXT: masknez $a5, $a1, $a4
+; LA64-LAMCAS-NEXT: maskeqz $a4, $a0, $a4
+; LA64-LAMCAS-NEXT: or $a4, $a4, $a5
+; LA64-LAMCAS-NEXT: ext.w.h $a5, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a5, .LBB41_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw umin ptr %a, i16 %b release
+ ret i16 %1
+}
+
+define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_max_i8_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a3, $a2, 24
+; LA64-NEXT: ori $a4, $zero, 255
+; LA64-NEXT: sll.w $a4, $a4, $a2
+; LA64-NEXT: ext.w.b $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: xori $a3, $a3, 56
+; LA64-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a7, $a1, .LBB42_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB42_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB42_3: # in Loop: Header=BB42_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a0, 0
+; LA64-NEXT: beqz $a6, .LBB42_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a5, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_max_i8_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: ext.w.b $a3, $a1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB42_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: slt $a5, $a3, $a4
+; LA64-LAMCAS-NEXT: masknez $a6, $a1, $a5
+; LA64-LAMCAS-NEXT: maskeqz $a5, $a0, $a5
+; LA64-LAMCAS-NEXT: or $a5, $a5, $a6
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a5, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB42_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw max ptr %a, i8 %b release
+ ret i8 %1
+}
+
+define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_max_i16_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a3, $a2, 24
+; LA64-NEXT: lu12i.w $a4, 15
+; LA64-NEXT: ori $a4, $a4, 4095
+; LA64-NEXT: sll.w $a4, $a4, $a2
+; LA64-NEXT: ext.w.h $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: ori $a5, $zero, 48
+; LA64-NEXT: sub.d $a3, $a5, $a3
+; LA64-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a7, $a1, .LBB43_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB43_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB43_3: # in Loop: Header=BB43_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a0, 0
+; LA64-NEXT: beqz $a6, .LBB43_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a5, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_max_i16_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: ext.w.h $a3, $a1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB43_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: slt $a5, $a3, $a4
+; LA64-LAMCAS-NEXT: masknez $a6, $a1, $a5
+; LA64-LAMCAS-NEXT: maskeqz $a5, $a0, $a5
+; LA64-LAMCAS-NEXT: or $a5, $a5, $a6
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a5, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB43_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw max ptr %a, i16 %b release
+ ret i16 %1
+}
+
+define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_min_i8_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a3, $a2, 24
+; LA64-NEXT: ori $a4, $zero, 255
+; LA64-NEXT: sll.w $a4, $a4, $a2
+; LA64-NEXT: ext.w.b $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: xori $a3, $a3, 56
+; LA64-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a1, $a7, .LBB44_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB44_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB44_3: # in Loop: Header=BB44_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a0, 0
+; LA64-NEXT: beqz $a6, .LBB44_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a5, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_min_i8_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: ext.w.b $a3, $a1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB44_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: slt $a5, $a3, $a4
+; LA64-LAMCAS-NEXT: xori $a5, $a5, 1
+; LA64-LAMCAS-NEXT: masknez $a6, $a1, $a5
+; LA64-LAMCAS-NEXT: maskeqz $a5, $a0, $a5
+; LA64-LAMCAS-NEXT: or $a5, $a5, $a6
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a5, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB44_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw min ptr %a, i8 %b release
+ ret i8 %1
+}
+
+define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_min_i16_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a3, $a2, 24
+; LA64-NEXT: lu12i.w $a4, 15
+; LA64-NEXT: ori $a4, $a4, 4095
+; LA64-NEXT: sll.w $a4, $a4, $a2
+; LA64-NEXT: ext.w.h $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: ori $a5, $zero, 48
+; LA64-NEXT: sub.d $a3, $a5, $a3
+; LA64-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a1, $a7, .LBB45_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a0, 0
+; LA64-NEXT: beqz $a6, .LBB45_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a5, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_min_i16_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: ext.w.h $a3, $a1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB45_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: slt $a5, $a3, $a4
+; LA64-LAMCAS-NEXT: xori $a5, $a5, 1
+; LA64-LAMCAS-NEXT: masknez $a6, $a1, $a5
+; LA64-LAMCAS-NEXT: maskeqz $a5, $a0, $a5
+; LA64-LAMCAS-NEXT: or $a5, $a5, $a6
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a5, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB45_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw min ptr %a, i16 %b release
+ ret i16 %1
+}
+
+define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_nand_i8_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a5, $a4, $a1
+; LA64-NEXT: nor $a5, $a5, $zero
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB46_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_nand_i8_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB46_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: nor $a3, $a3, $zero
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB46_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw nand ptr %a, i8 %b release
+ ret i8 %1
+}
+
+define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_nand_i16_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a5, $a4, $a1
+; LA64-NEXT: nor $a5, $a5, $zero
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB47_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_nand_i16_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB47_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: nor $a3, $a3, $zero
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB47_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw nand ptr %a, i16 %b release
+ ret i16 %1
+
+}
+
+define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind {
+; LA64-LABEL: atomicrmw_nand_i32_release:
+; LA64: # %bb.0:
+; LA64-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a2, $a0, 0
+; LA64-NEXT: and $a3, $a2, $a1
+; LA64-NEXT: nor $a3, $a3, $zero
+; LA64-NEXT: sc.w $a3, $a0, 0
+; LA64-NEXT: beqz $a3, .LBB48_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_nand_i32_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.w $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB48_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: move $a3, $a0
+; LA64-LAMCAS-NEXT: and $a4, $a0, $a1
+; LA64-LAMCAS-NEXT: nor $a4, $a4, $zero
+; LA64-LAMCAS-NEXT: amcas_db.w $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a3, .LBB48_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw nand ptr %a, i32 %b release
+ ret i32 %1
+}
+
+define i64 @atomicrmw_nand_i64_release(ptr %a, i64 %b) nounwind {
+; LA64-LABEL: atomicrmw_nand_i64_release:
+; LA64: # %bb.0:
+; LA64-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.d $a2, $a0, 0
+; LA64-NEXT: and $a3, $a2, $a1
+; LA64-NEXT: nor $a3, $a3, $zero
+; LA64-NEXT: sc.d $a3, $a0, 0
+; LA64-NEXT: beqz $a3, .LBB49_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_nand_i64_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.d $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB49_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: move $a3, $a0
+; LA64-LAMCAS-NEXT: and $a4, $a0, $a1
+; LA64-LAMCAS-NEXT: nor $a4, $a4, $zero
+; LA64-LAMCAS-NEXT: amcas_db.d $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a3, .LBB49_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw nand ptr %a, i64 %b release
+ ret i64 %1
+}
+
+define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_and_i8_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: orn $a1, $a1, $a3
+; LA64-NEXT: amand_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_and_i8_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB50_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB50_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw and ptr %a, i8 %b release
+ ret i8 %1
+}
+
+define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_and_i16_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: orn $a1, $a1, $a3
+; LA64-NEXT: amand_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_and_i16_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB51_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB51_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw and ptr %a, i16 %b release
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_or_i8_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: amor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_or_i8_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB52_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: or $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB52_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw or ptr %a, i8 %b release
+ ret i8 %1
+}
+
+define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_or_i16_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: amor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_or_i16_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB53_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: or $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB53_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw or ptr %a, i16 %b release
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_xor_i8_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: amxor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xor_i8_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB54_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: xor $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB54_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xor ptr %a, i8 %b release
+ ret i8 %1
+}
+
+define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_xor_i16_release:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: amxor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xor_i16_release:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB55_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: xor $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB55_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xor ptr %a, i16 %b release
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_xchg_i8_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: addi.w $a5, $a1, 0
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB56_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_i8_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: ld.bu $a2, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB56_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a3, $a2
+; LA64-LAMCAS-NEXT: amcas_db.b $a2, $a1, $a0
+; LA64-LAMCAS-NEXT: bne $a2, $a3, .LBB56_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: move $a0, $a2
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i8 %b acq_rel
+ ret i8 %1
+}
+
+define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind {
+; LA64-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a1, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a2, $zero, 255
+; LA64-NEXT: sll.w $a2, $a2, $a1
+; LA64-NEXT: nor $a2, $a2, $zero
+; LA64-NEXT: amand_db.w $a3, $a2, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a1
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a1, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB57_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a2, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $zero, $a1
+; LA64-LAMCAS-NEXT: bne $a0, $a2, .LBB57_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i8 0 acq_rel
+ ret i8 %1
+}
+
+define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind {
+; LA64-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a1, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a2, $zero, 255
+; LA64-NEXT: sll.w $a2, $a2, $a1
+; LA64-NEXT: amor_db.w $a3, $a2, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a1
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a1, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: addi.w $a2, $zero, -1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB58_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a3, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a2, $a1
+; LA64-LAMCAS-NEXT: bne $a0, $a3, .LBB58_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i8 -1 acq_rel
+ ret i8 %1
+}
+
+define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_xchg_i16_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB59_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: addi.w $a5, $a1, 0
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB59_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_i16_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: ld.hu $a2, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB59_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a3, $a2
+; LA64-LAMCAS-NEXT: amcas_db.h $a2, $a1, $a0
+; LA64-LAMCAS-NEXT: bne $a2, $a3, .LBB59_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: move $a0, $a2
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i16 %b acq_rel
+ ret i16 %1
+}
+
+define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind {
+; LA64-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a1, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a2, 15
+; LA64-NEXT: ori $a2, $a2, 4095
+; LA64-NEXT: sll.w $a2, $a2, $a1
+; LA64-NEXT: nor $a2, $a2, $zero
+; LA64-NEXT: amand_db.w $a3, $a2, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a1
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a1, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB60_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a2, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $zero, $a1
+; LA64-LAMCAS-NEXT: bne $a0, $a2, .LBB60_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i16 0 acq_rel
+ ret i16 %1
+}
+
+define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind {
+; LA64-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a1, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a2, 15
+; LA64-NEXT: ori $a2, $a2, 4095
+; LA64-NEXT: sll.w $a2, $a2, $a1
+; LA64-NEXT: amor_db.w $a3, $a2, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a1
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a1, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: addi.w $a2, $zero, -1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB61_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a3, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a2, $a1
+; LA64-LAMCAS-NEXT: bne $a0, $a3, .LBB61_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i16 -1 acq_rel
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_add_i8_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: add.w $a5, $a4, $a1
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB62_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_add_i8_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB62_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: add.d $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB62_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw add ptr %a, i8 %b acq_rel
+ ret i8 %1
+}
+
+define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_add_i16_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: add.w $a5, $a4, $a1
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB63_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_add_i16_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB63_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: add.d $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB63_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw add ptr %a, i16 %b acq_rel
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_sub_i8_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: sub.w $a5, $a4, $a1
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB64_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_sub_i8_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB64_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: sub.d $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB64_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw sub ptr %a, i8 %b acq_rel
+ ret i8 %1
+}
+
+define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_sub_i16_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: sub.w $a5, $a4, $a1
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB65_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_sub_i16_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB65_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: sub.d $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB65_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw sub ptr %a, i16 %b acq_rel
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_umax_i8_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a6, $a1, .LBB66_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB66_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB66_3: # in Loop: Header=BB66_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB66_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_umax_i8_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: andi $a3, $a1, 255
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB66_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: andi $a4, $a0, 255
+; LA64-LAMCAS-NEXT: sltu $a4, $a3, $a4
+; LA64-LAMCAS-NEXT: masknez $a5, $a1, $a4
+; LA64-LAMCAS-NEXT: maskeqz $a4, $a0, $a4
+; LA64-LAMCAS-NEXT: or $a4, $a4, $a5
+; LA64-LAMCAS-NEXT: ext.w.b $a5, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a5, .LBB66_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw umax ptr %a, i8 %b acq_rel
+ ret i8 %1
+}
+
+define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_umax_i16_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a6, $a1, .LBB67_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB67_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB67_3: # in Loop: Header=BB67_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB67_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_umax_i16_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: bstrpick.d $a3, $a1, 15, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB67_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: bstrpick.d $a4, $a0, 15, 0
+; LA64-LAMCAS-NEXT: sltu $a4, $a3, $a4
+; LA64-LAMCAS-NEXT: masknez $a5, $a1, $a4
+; LA64-LAMCAS-NEXT: maskeqz $a4, $a0, $a4
+; LA64-LAMCAS-NEXT: or $a4, $a4, $a5
+; LA64-LAMCAS-NEXT: ext.w.h $a5, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a5, .LBB67_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw umax ptr %a, i16 %b acq_rel
+ ret i16 %1
+}
+
+define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_umin_i8_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a1, $a6, .LBB68_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB68_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB68_3: # in Loop: Header=BB68_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB68_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_umin_i8_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: andi $a3, $a1, 255
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB68_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: andi $a4, $a0, 255
+; LA64-LAMCAS-NEXT: sltu $a4, $a3, $a4
+; LA64-LAMCAS-NEXT: xori $a4, $a4, 1
+; LA64-LAMCAS-NEXT: masknez $a5, $a1, $a4
+; LA64-LAMCAS-NEXT: maskeqz $a4, $a0, $a4
+; LA64-LAMCAS-NEXT: or $a4, $a4, $a5
+; LA64-LAMCAS-NEXT: ext.w.b $a5, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a5, .LBB68_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw umin ptr %a, i8 %b acq_rel
+ ret i8 %1
+}
+
+define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_umin_i16_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a1, $a6, .LBB69_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB69_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB69_3: # in Loop: Header=BB69_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB69_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_umin_i16_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: bstrpick.d $a3, $a1, 15, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB69_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: bstrpick.d $a4, $a0, 15, 0
+; LA64-LAMCAS-NEXT: sltu $a4, $a3, $a4
+; LA64-LAMCAS-NEXT: xori $a4, $a4, 1
+; LA64-LAMCAS-NEXT: masknez $a5, $a1, $a4
+; LA64-LAMCAS-NEXT: maskeqz $a4, $a0, $a4
+; LA64-LAMCAS-NEXT: or $a4, $a4, $a5
+; LA64-LAMCAS-NEXT: ext.w.h $a5, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a5, .LBB69_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw umin ptr %a, i16 %b acq_rel
+ ret i16 %1
+}
+
+define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_max_i8_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a3, $a2, 24
+; LA64-NEXT: ori $a4, $zero, 255
+; LA64-NEXT: sll.w $a4, $a4, $a2
+; LA64-NEXT: ext.w.b $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: xori $a3, $a3, 56
+; LA64-NEXT: .LBB70_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a7, $a1, .LBB70_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB70_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB70_3: # in Loop: Header=BB70_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a0, 0
+; LA64-NEXT: beqz $a6, .LBB70_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a5, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_max_i8_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: ext.w.b $a3, $a1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB70_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: slt $a5, $a3, $a4
+; LA64-LAMCAS-NEXT: masknez $a6, $a1, $a5
+; LA64-LAMCAS-NEXT: maskeqz $a5, $a0, $a5
+; LA64-LAMCAS-NEXT: or $a5, $a5, $a6
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a5, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB70_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw max ptr %a, i8 %b acq_rel
+ ret i8 %1
+}
+
+define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_max_i16_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a3, $a2, 24
+; LA64-NEXT: lu12i.w $a4, 15
+; LA64-NEXT: ori $a4, $a4, 4095
+; LA64-NEXT: sll.w $a4, $a4, $a2
+; LA64-NEXT: ext.w.h $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: ori $a5, $zero, 48
+; LA64-NEXT: sub.d $a3, $a5, $a3
+; LA64-NEXT: .LBB71_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a7, $a1, .LBB71_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB71_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB71_3: # in Loop: Header=BB71_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a0, 0
+; LA64-NEXT: beqz $a6, .LBB71_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a5, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_max_i16_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: ext.w.h $a3, $a1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB71_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: slt $a5, $a3, $a4
+; LA64-LAMCAS-NEXT: masknez $a6, $a1, $a5
+; LA64-LAMCAS-NEXT: maskeqz $a5, $a0, $a5
+; LA64-LAMCAS-NEXT: or $a5, $a5, $a6
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a5, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB71_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw max ptr %a, i16 %b acq_rel
+ ret i16 %1
+}
+
+define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_min_i8_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a3, $a2, 24
+; LA64-NEXT: ori $a4, $zero, 255
+; LA64-NEXT: sll.w $a4, $a4, $a2
+; LA64-NEXT: ext.w.b $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: xori $a3, $a3, 56
+; LA64-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a1, $a7, .LBB72_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB72_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB72_3: # in Loop: Header=BB72_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a0, 0
+; LA64-NEXT: beqz $a6, .LBB72_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a5, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_min_i8_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: ext.w.b $a3, $a1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB72_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: slt $a5, $a3, $a4
+; LA64-LAMCAS-NEXT: xori $a5, $a5, 1
+; LA64-LAMCAS-NEXT: masknez $a6, $a1, $a5
+; LA64-LAMCAS-NEXT: maskeqz $a5, $a0, $a5
+; LA64-LAMCAS-NEXT: or $a5, $a5, $a6
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a5, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB72_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw min ptr %a, i8 %b acq_rel
+ ret i8 %1
+}
+
+define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_min_i16_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a3, $a2, 24
+; LA64-NEXT: lu12i.w $a4, 15
+; LA64-NEXT: ori $a4, $a4, 4095
+; LA64-NEXT: sll.w $a4, $a4, $a2
+; LA64-NEXT: ext.w.h $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: ori $a5, $zero, 48
+; LA64-NEXT: sub.d $a3, $a5, $a3
+; LA64-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a1, $a7, .LBB73_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB73_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB73_3: # in Loop: Header=BB73_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a0, 0
+; LA64-NEXT: beqz $a6, .LBB73_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a5, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_min_i16_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: ext.w.h $a3, $a1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB73_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: slt $a5, $a3, $a4
+; LA64-LAMCAS-NEXT: xori $a5, $a5, 1
+; LA64-LAMCAS-NEXT: masknez $a6, $a1, $a5
+; LA64-LAMCAS-NEXT: maskeqz $a5, $a0, $a5
+; LA64-LAMCAS-NEXT: or $a5, $a5, $a6
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a5, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB73_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw min ptr %a, i16 %b acq_rel
+ ret i16 %1
+}
+
+define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_nand_i8_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB74_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a5, $a4, $a1
+; LA64-NEXT: nor $a5, $a5, $zero
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB74_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB74_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: nor $a3, $a3, $zero
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB74_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw nand ptr %a, i8 %b acq_rel
+ ret i8 %1
+}
+
+define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_nand_i16_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB75_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a5, $a4, $a1
+; LA64-NEXT: nor $a5, $a5, $zero
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB75_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB75_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: nor $a3, $a3, $zero
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB75_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw nand ptr %a, i16 %b acq_rel
+ ret i16 %1
+
+}
+
+define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind {
+; LA64-LABEL: atomicrmw_nand_i32_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a2, $a0, 0
+; LA64-NEXT: and $a3, $a2, $a1
+; LA64-NEXT: nor $a3, $a3, $zero
+; LA64-NEXT: sc.w $a3, $a0, 0
+; LA64-NEXT: beqz $a3, .LBB76_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_nand_i32_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.w $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB76_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: move $a3, $a0
+; LA64-LAMCAS-NEXT: and $a4, $a0, $a1
+; LA64-LAMCAS-NEXT: nor $a4, $a4, $zero
+; LA64-LAMCAS-NEXT: amcas_db.w $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a3, .LBB76_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw nand ptr %a, i32 %b acq_rel
+ ret i32 %1
+}
+
+define i64 @atomicrmw_nand_i64_acq_rel(ptr %a, i64 %b) nounwind {
+; LA64-LABEL: atomicrmw_nand_i64_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.d $a2, $a0, 0
+; LA64-NEXT: and $a3, $a2, $a1
+; LA64-NEXT: nor $a3, $a3, $zero
+; LA64-NEXT: sc.d $a3, $a0, 0
+; LA64-NEXT: beqz $a3, .LBB77_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_nand_i64_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.d $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB77_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: move $a3, $a0
+; LA64-LAMCAS-NEXT: and $a4, $a0, $a1
+; LA64-LAMCAS-NEXT: nor $a4, $a4, $zero
+; LA64-LAMCAS-NEXT: amcas_db.d $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a3, .LBB77_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw nand ptr %a, i64 %b acq_rel
+ ret i64 %1
+}
+
+
+
+define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_and_i8_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: orn $a1, $a1, $a3
+; LA64-NEXT: amand_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_and_i8_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB78_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB78_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw and ptr %a, i8 %b acq_rel
+ ret i8 %1
+}
+
+define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_and_i16_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: orn $a1, $a1, $a3
+; LA64-NEXT: amand_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_and_i16_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB79_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB79_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw and ptr %a, i16 %b acq_rel
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_or_i8_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: amor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_or_i8_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB80_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: or $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB80_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw or ptr %a, i8 %b acq_rel
+ ret i8 %1
+}
+
+define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_or_i16_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: amor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_or_i16_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB81_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: or $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB81_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw or ptr %a, i16 %b acq_rel
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_xor_i8_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: amxor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xor_i8_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB82_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: xor $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB82_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xor ptr %a, i8 %b acq_rel
+ ret i8 %1
+}
+
+define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_xor_i16_acq_rel:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: amxor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xor_i16_acq_rel:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB83_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: xor $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB83_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xor ptr %a, i16 %b acq_rel
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_xchg_i8_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: addi.w $a5, $a1, 0
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB84_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_i8_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: ld.bu $a2, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB84_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a3, $a2
+; LA64-LAMCAS-NEXT: amcas_db.b $a2, $a1, $a0
+; LA64-LAMCAS-NEXT: bne $a2, $a3, .LBB84_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: move $a0, $a2
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i8 %b seq_cst
+ ret i8 %1
+}
+
+define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind {
+; LA64-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a1, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a2, $zero, 255
+; LA64-NEXT: sll.w $a2, $a2, $a1
+; LA64-NEXT: nor $a2, $a2, $zero
+; LA64-NEXT: amand_db.w $a3, $a2, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a1
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a1, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB85_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a2, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $zero, $a1
+; LA64-LAMCAS-NEXT: bne $a0, $a2, .LBB85_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i8 0 seq_cst
+ ret i8 %1
+}
+
+define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind {
+; LA64-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a1, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a2, $zero, 255
+; LA64-NEXT: sll.w $a2, $a2, $a1
+; LA64-NEXT: amor_db.w $a3, $a2, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a1
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a1, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: addi.w $a2, $zero, -1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB86_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a3, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a2, $a1
+; LA64-LAMCAS-NEXT: bne $a0, $a3, .LBB86_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i8 -1 seq_cst
+ ret i8 %1
+}
+
+define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_xchg_i16_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: addi.w $a5, $a1, 0
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB87_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_i16_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: ld.hu $a2, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB87_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a3, $a2
+; LA64-LAMCAS-NEXT: amcas_db.h $a2, $a1, $a0
+; LA64-LAMCAS-NEXT: bne $a2, $a3, .LBB87_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: move $a0, $a2
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i16 %b seq_cst
+ ret i16 %1
+}
+
+define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind {
+; LA64-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a1, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a2, 15
+; LA64-NEXT: ori $a2, $a2, 4095
+; LA64-NEXT: sll.w $a2, $a2, $a1
+; LA64-NEXT: nor $a2, $a2, $zero
+; LA64-NEXT: amand_db.w $a3, $a2, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a1
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a1, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB88_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a2, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $zero, $a1
+; LA64-LAMCAS-NEXT: bne $a0, $a2, .LBB88_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i16 0 seq_cst
+ ret i16 %1
+}
+
+define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind {
+; LA64-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a1, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a2, 15
+; LA64-NEXT: ori $a2, $a2, 4095
+; LA64-NEXT: sll.w $a2, $a2, $a1
+; LA64-NEXT: amor_db.w $a3, $a2, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a1
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a1, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: addi.w $a2, $zero, -1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB89_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a3, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a2, $a1
+; LA64-LAMCAS-NEXT: bne $a0, $a3, .LBB89_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i16 -1 seq_cst
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_add_i8_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB90_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: add.w $a5, $a4, $a1
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB90_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_add_i8_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB90_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: add.d $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB90_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw add ptr %a, i8 %b seq_cst
+ ret i8 %1
+}
+
+define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_add_i16_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB91_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: add.w $a5, $a4, $a1
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB91_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_add_i16_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB91_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: add.d $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB91_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw add ptr %a, i16 %b seq_cst
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_sub_i8_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: sub.w $a5, $a4, $a1
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB92_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_sub_i8_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB92_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: sub.d $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB92_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw sub ptr %a, i8 %b seq_cst
+ ret i8 %1
+}
+
+define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_sub_i16_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: sub.w $a5, $a4, $a1
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB93_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_sub_i16_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB93_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: sub.d $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB93_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw sub ptr %a, i16 %b seq_cst
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_umax_i8_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a6, $a1, .LBB94_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB94_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB94_3: # in Loop: Header=BB94_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB94_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_umax_i8_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: andi $a3, $a1, 255
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB94_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: andi $a4, $a0, 255
+; LA64-LAMCAS-NEXT: sltu $a4, $a3, $a4
+; LA64-LAMCAS-NEXT: masknez $a5, $a1, $a4
+; LA64-LAMCAS-NEXT: maskeqz $a4, $a0, $a4
+; LA64-LAMCAS-NEXT: or $a4, $a4, $a5
+; LA64-LAMCAS-NEXT: ext.w.b $a5, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a5, .LBB94_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw umax ptr %a, i8 %b seq_cst
+ ret i8 %1
+}
+
+define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_umax_i16_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a6, $a1, .LBB95_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB95_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB95_3: # in Loop: Header=BB95_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB95_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_umax_i16_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: bstrpick.d $a3, $a1, 15, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB95_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: bstrpick.d $a4, $a0, 15, 0
+; LA64-LAMCAS-NEXT: sltu $a4, $a3, $a4
+; LA64-LAMCAS-NEXT: masknez $a5, $a1, $a4
+; LA64-LAMCAS-NEXT: maskeqz $a4, $a0, $a4
+; LA64-LAMCAS-NEXT: or $a4, $a4, $a5
+; LA64-LAMCAS-NEXT: ext.w.h $a5, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a5, .LBB95_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw umax ptr %a, i16 %b seq_cst
+ ret i16 %1
+}
+
+define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_umin_i8_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a1, $a6, .LBB96_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB96_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB96_3: # in Loop: Header=BB96_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB96_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_umin_i8_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: andi $a3, $a1, 255
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB96_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: andi $a4, $a0, 255
+; LA64-LAMCAS-NEXT: sltu $a4, $a3, $a4
+; LA64-LAMCAS-NEXT: xori $a4, $a4, 1
+; LA64-LAMCAS-NEXT: masknez $a5, $a1, $a4
+; LA64-LAMCAS-NEXT: maskeqz $a4, $a0, $a4
+; LA64-LAMCAS-NEXT: or $a4, $a4, $a5
+; LA64-LAMCAS-NEXT: ext.w.b $a5, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a5, .LBB96_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw umin ptr %a, i8 %b seq_cst
+ ret i8 %1
+}
+
+define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_umin_i16_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a1, $a6, .LBB97_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB97_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB97_3: # in Loop: Header=BB97_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB97_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_umin_i16_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: bstrpick.d $a3, $a1, 15, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB97_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: bstrpick.d $a4, $a0, 15, 0
+; LA64-LAMCAS-NEXT: sltu $a4, $a3, $a4
+; LA64-LAMCAS-NEXT: xori $a4, $a4, 1
+; LA64-LAMCAS-NEXT: masknez $a5, $a1, $a4
+; LA64-LAMCAS-NEXT: maskeqz $a4, $a0, $a4
+; LA64-LAMCAS-NEXT: or $a4, $a4, $a5
+; LA64-LAMCAS-NEXT: ext.w.h $a5, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a5, .LBB97_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw umin ptr %a, i16 %b seq_cst
+ ret i16 %1
+}
+
+define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_max_i8_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a3, $a2, 24
+; LA64-NEXT: ori $a4, $zero, 255
+; LA64-NEXT: sll.w $a4, $a4, $a2
+; LA64-NEXT: ext.w.b $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: xori $a3, $a3, 56
+; LA64-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a7, $a1, .LBB98_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB98_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB98_3: # in Loop: Header=BB98_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a0, 0
+; LA64-NEXT: beqz $a6, .LBB98_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a5, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_max_i8_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: ext.w.b $a3, $a1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB98_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: slt $a5, $a3, $a4
+; LA64-LAMCAS-NEXT: masknez $a6, $a1, $a5
+; LA64-LAMCAS-NEXT: maskeqz $a5, $a0, $a5
+; LA64-LAMCAS-NEXT: or $a5, $a5, $a6
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a5, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB98_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw max ptr %a, i8 %b seq_cst
+ ret i8 %1
+}
+
+define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_max_i16_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a3, $a2, 24
+; LA64-NEXT: lu12i.w $a4, 15
+; LA64-NEXT: ori $a4, $a4, 4095
+; LA64-NEXT: sll.w $a4, $a4, $a2
+; LA64-NEXT: ext.w.h $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: ori $a5, $zero, 48
+; LA64-NEXT: sub.d $a3, $a5, $a3
+; LA64-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a7, $a1, .LBB99_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB99_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB99_3: # in Loop: Header=BB99_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a0, 0
+; LA64-NEXT: beqz $a6, .LBB99_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a5, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_max_i16_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: ext.w.h $a3, $a1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB99_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: slt $a5, $a3, $a4
+; LA64-LAMCAS-NEXT: masknez $a6, $a1, $a5
+; LA64-LAMCAS-NEXT: maskeqz $a5, $a0, $a5
+; LA64-LAMCAS-NEXT: or $a5, $a5, $a6
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a5, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB99_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw max ptr %a, i16 %b seq_cst
+ ret i16 %1
+}
+
+define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_min_i8_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a3, $a2, 24
+; LA64-NEXT: ori $a4, $zero, 255
+; LA64-NEXT: sll.w $a4, $a4, $a2
+; LA64-NEXT: ext.w.b $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: xori $a3, $a3, 56
+; LA64-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a1, $a7, .LBB100_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB100_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB100_3: # in Loop: Header=BB100_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a0, 0
+; LA64-NEXT: beqz $a6, .LBB100_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a5, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_min_i8_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: ext.w.b $a3, $a1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB100_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: slt $a5, $a3, $a4
+; LA64-LAMCAS-NEXT: xori $a5, $a5, 1
+; LA64-LAMCAS-NEXT: masknez $a6, $a1, $a5
+; LA64-LAMCAS-NEXT: maskeqz $a5, $a0, $a5
+; LA64-LAMCAS-NEXT: or $a5, $a5, $a6
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a5, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB100_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw min ptr %a, i8 %b seq_cst
+ ret i8 %1
+}
+
+define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_min_i16_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a3, $a2, 24
+; LA64-NEXT: lu12i.w $a4, 15
+; LA64-NEXT: ori $a4, $a4, 4095
+; LA64-NEXT: sll.w $a4, $a4, $a2
+; LA64-NEXT: ext.w.h $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: ori $a5, $zero, 48
+; LA64-NEXT: sub.d $a3, $a5, $a3
+; LA64-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a1, $a7, .LBB101_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB101_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB101_3: # in Loop: Header=BB101_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a0, 0
+; LA64-NEXT: beqz $a6, .LBB101_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a5, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_min_i16_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: ext.w.h $a3, $a1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB101_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: slt $a5, $a3, $a4
+; LA64-LAMCAS-NEXT: xori $a5, $a5, 1
+; LA64-LAMCAS-NEXT: masknez $a6, $a1, $a5
+; LA64-LAMCAS-NEXT: maskeqz $a5, $a0, $a5
+; LA64-LAMCAS-NEXT: or $a5, $a5, $a6
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a5, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB101_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw min ptr %a, i16 %b seq_cst
+ ret i16 %1
+}
+
+define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_nand_i8_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB102_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a5, $a4, $a1
+; LA64-NEXT: nor $a5, $a5, $zero
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB102_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_nand_i8_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB102_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: nor $a3, $a3, $zero
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB102_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw nand ptr %a, i8 %b seq_cst
+ ret i8 %1
+}
+
+define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_nand_i16_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB103_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a5, $a4, $a1
+; LA64-NEXT: nor $a5, $a5, $zero
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB103_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_nand_i16_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB103_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: nor $a3, $a3, $zero
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB103_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw nand ptr %a, i16 %b seq_cst
+ ret i16 %1
+
+}
+
+define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind {
+; LA64-LABEL: atomicrmw_nand_i32_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a2, $a0, 0
+; LA64-NEXT: and $a3, $a2, $a1
+; LA64-NEXT: nor $a3, $a3, $zero
+; LA64-NEXT: sc.w $a3, $a0, 0
+; LA64-NEXT: beqz $a3, .LBB104_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_nand_i32_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.w $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB104_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: move $a3, $a0
+; LA64-LAMCAS-NEXT: and $a4, $a0, $a1
+; LA64-LAMCAS-NEXT: nor $a4, $a4, $zero
+; LA64-LAMCAS-NEXT: amcas_db.w $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a3, .LBB104_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw nand ptr %a, i32 %b seq_cst
+ ret i32 %1
+}
+
+define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind {
+; LA64-LABEL: atomicrmw_nand_i64_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.d $a2, $a0, 0
+; LA64-NEXT: and $a3, $a2, $a1
+; LA64-NEXT: nor $a3, $a3, $zero
+; LA64-NEXT: sc.d $a3, $a0, 0
+; LA64-NEXT: beqz $a3, .LBB105_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_nand_i64_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.d $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB105_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: move $a3, $a0
+; LA64-LAMCAS-NEXT: and $a4, $a0, $a1
+; LA64-LAMCAS-NEXT: nor $a4, $a4, $zero
+; LA64-LAMCAS-NEXT: amcas_db.d $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a3, .LBB105_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw nand ptr %a, i64 %b seq_cst
+ ret i64 %1
+}
+
+
+
+define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_and_i8_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: orn $a1, $a1, $a3
+; LA64-NEXT: amand_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_and_i8_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB106_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB106_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw and ptr %a, i8 %b seq_cst
+ ret i8 %1
+}
+
+define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_and_i16_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: orn $a1, $a1, $a3
+; LA64-NEXT: amand_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_and_i16_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB107_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB107_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw and ptr %a, i16 %b seq_cst
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_or_i8_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: amor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_or_i8_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB108_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: or $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB108_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw or ptr %a, i8 %b seq_cst
+ ret i8 %1
+}
+
+define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_or_i16_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: amor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_or_i16_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB109_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: or $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB109_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw or ptr %a, i16 %b seq_cst
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_xor_i8_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: amxor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xor_i8_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB110_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: xor $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB110_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xor ptr %a, i8 %b seq_cst
+ ret i8 %1
+}
+
+define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_xor_i16_seq_cst:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: amxor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xor_i16_seq_cst:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB111_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: xor $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB111_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xor ptr %a, i16 %b seq_cst
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_xchg_i8_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: addi.w $a5, $a1, 0
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB112_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_i8_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: ld.bu $a2, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB112_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a3, $a2
+; LA64-LAMCAS-NEXT: amcas.b $a2, $a1, $a0
+; LA64-LAMCAS-NEXT: bne $a2, $a3, .LBB112_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: move $a0, $a2
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i8 %b monotonic
+ ret i8 %1
+}
+
+define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind {
+; LA64-LABEL: atomicrmw_xchg_0_i8_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a1, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a2, $zero, 255
+; LA64-NEXT: sll.w $a2, $a2, $a1
+; LA64-NEXT: nor $a2, $a2, $zero
+; LA64-NEXT: amand.w $a3, $a2, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a1
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_0_i8_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a1, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB113_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a2, $a0
+; LA64-LAMCAS-NEXT: amcas.b $a0, $zero, $a1
+; LA64-LAMCAS-NEXT: bne $a0, $a2, .LBB113_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i8 0 monotonic
+ ret i8 %1
+}
+
+define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind {
+; LA64-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a1, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a2, $zero, 255
+; LA64-NEXT: sll.w $a2, $a2, $a1
+; LA64-NEXT: amor.w $a3, $a2, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a1
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a1, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: addi.w $a2, $zero, -1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB114_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a3, $a0
+; LA64-LAMCAS-NEXT: amcas.b $a0, $a2, $a1
+; LA64-LAMCAS-NEXT: bne $a0, $a3, .LBB114_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i8 -1 monotonic
+ ret i8 %1
+}
+
+define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_xchg_i16_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB115_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: addi.w $a5, $a1, 0
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB115_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_i16_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: ld.hu $a2, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB115_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a3, $a2
+; LA64-LAMCAS-NEXT: amcas.h $a2, $a1, $a0
+; LA64-LAMCAS-NEXT: bne $a2, $a3, .LBB115_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: move $a0, $a2
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i16 %b monotonic
+ ret i16 %1
+}
+
+define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind {
+; LA64-LABEL: atomicrmw_xchg_0_i16_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a1, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a2, 15
+; LA64-NEXT: ori $a2, $a2, 4095
+; LA64-NEXT: sll.w $a2, $a2, $a1
+; LA64-NEXT: nor $a2, $a2, $zero
+; LA64-NEXT: amand.w $a3, $a2, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a1
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_0_i16_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a1, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB116_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a2, $a0
+; LA64-LAMCAS-NEXT: amcas.h $a0, $zero, $a1
+; LA64-LAMCAS-NEXT: bne $a0, $a2, .LBB116_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i16 0 monotonic
+ ret i16 %1
+}
+
+define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind {
+; LA64-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a1, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a2, 15
+; LA64-NEXT: ori $a2, $a2, 4095
+; LA64-NEXT: sll.w $a2, $a2, $a1
+; LA64-NEXT: amor.w $a3, $a2, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a1
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a1, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: addi.w $a2, $zero, -1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB117_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a3, $a0
+; LA64-LAMCAS-NEXT: amcas.h $a0, $a2, $a1
+; LA64-LAMCAS-NEXT: bne $a0, $a3, .LBB117_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xchg ptr %a, i16 -1 monotonic
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_add_i8_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: add.w $a5, $a4, $a1
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB118_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_add_i8_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB118_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: add.d $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB118_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw add ptr %a, i8 %b monotonic
+ ret i8 %1
+}
+
+define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_add_i16_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB119_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: add.w $a5, $a4, $a1
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB119_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_add_i16_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB119_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: add.d $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB119_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw add ptr %a, i16 %b monotonic
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_sub_i8_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: sub.w $a5, $a4, $a1
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB120_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_sub_i8_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB120_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: sub.d $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB120_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw sub ptr %a, i8 %b monotonic
+ ret i8 %1
+}
+
+define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_sub_i16_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: sub.w $a5, $a4, $a1
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB121_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_sub_i16_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB121_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: sub.d $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB121_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw sub ptr %a, i16 %b monotonic
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_umax_i8_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a6, $a1, .LBB122_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB122_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_umax_i8_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: andi $a3, $a1, 255
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB122_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: andi $a4, $a0, 255
+; LA64-LAMCAS-NEXT: sltu $a4, $a3, $a4
+; LA64-LAMCAS-NEXT: masknez $a5, $a1, $a4
+; LA64-LAMCAS-NEXT: maskeqz $a4, $a0, $a4
+; LA64-LAMCAS-NEXT: or $a4, $a4, $a5
+; LA64-LAMCAS-NEXT: ext.w.b $a5, $a0
+; LA64-LAMCAS-NEXT: amcas.b $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a5, .LBB122_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw umax ptr %a, i8 %b monotonic
+ ret i8 %1
+}
+
+define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_umax_i16_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a6, $a1, .LBB123_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB123_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_umax_i16_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: bstrpick.d $a3, $a1, 15, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB123_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: bstrpick.d $a4, $a0, 15, 0
+; LA64-LAMCAS-NEXT: sltu $a4, $a3, $a4
+; LA64-LAMCAS-NEXT: masknez $a5, $a1, $a4
+; LA64-LAMCAS-NEXT: maskeqz $a4, $a0, $a4
+; LA64-LAMCAS-NEXT: or $a4, $a4, $a5
+; LA64-LAMCAS-NEXT: ext.w.h $a5, $a0
+; LA64-LAMCAS-NEXT: amcas.h $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a5, .LBB123_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw umax ptr %a, i16 %b monotonic
+ ret i16 %1
+}
+
+define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_umin_i8_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a1, $a6, .LBB124_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB124_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB124_3: # in Loop: Header=BB124_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB124_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_umin_i8_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: andi $a3, $a1, 255
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB124_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: andi $a4, $a0, 255
+; LA64-LAMCAS-NEXT: sltu $a4, $a3, $a4
+; LA64-LAMCAS-NEXT: xori $a4, $a4, 1
+; LA64-LAMCAS-NEXT: masknez $a5, $a1, $a4
+; LA64-LAMCAS-NEXT: maskeqz $a4, $a0, $a4
+; LA64-LAMCAS-NEXT: or $a4, $a4, $a5
+; LA64-LAMCAS-NEXT: ext.w.b $a5, $a0
+; LA64-LAMCAS-NEXT: amcas.b $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a5, .LBB124_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw umin ptr %a, i8 %b monotonic
+ ret i8 %1
+}
+
+define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_umin_i16_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a1, $a6, .LBB125_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB125_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB125_3: # in Loop: Header=BB125_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB125_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_umin_i16_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: bstrpick.d $a3, $a1, 15, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB125_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: bstrpick.d $a4, $a0, 15, 0
+; LA64-LAMCAS-NEXT: sltu $a4, $a3, $a4
+; LA64-LAMCAS-NEXT: xori $a4, $a4, 1
+; LA64-LAMCAS-NEXT: masknez $a5, $a1, $a4
+; LA64-LAMCAS-NEXT: maskeqz $a4, $a0, $a4
+; LA64-LAMCAS-NEXT: or $a4, $a4, $a5
+; LA64-LAMCAS-NEXT: ext.w.h $a5, $a0
+; LA64-LAMCAS-NEXT: amcas.h $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a5, .LBB125_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw umin ptr %a, i16 %b monotonic
+ ret i16 %1
+}
+
+define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_max_i8_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a3, $a2, 24
+; LA64-NEXT: ori $a4, $zero, 255
+; LA64-NEXT: sll.w $a4, $a4, $a2
+; LA64-NEXT: ext.w.b $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: xori $a3, $a3, 56
+; LA64-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a7, $a1, .LBB126_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a0, 0
+; LA64-NEXT: beqz $a6, .LBB126_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a5, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_max_i8_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: ext.w.b $a3, $a1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB126_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: slt $a5, $a3, $a4
+; LA64-LAMCAS-NEXT: masknez $a6, $a1, $a5
+; LA64-LAMCAS-NEXT: maskeqz $a5, $a0, $a5
+; LA64-LAMCAS-NEXT: or $a5, $a5, $a6
+; LA64-LAMCAS-NEXT: amcas.b $a0, $a5, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB126_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw max ptr %a, i8 %b monotonic
+ ret i8 %1
+}
+
+define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_max_i16_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a3, $a2, 24
+; LA64-NEXT: lu12i.w $a4, 15
+; LA64-NEXT: ori $a4, $a4, 4095
+; LA64-NEXT: sll.w $a4, $a4, $a2
+; LA64-NEXT: ext.w.h $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: ori $a5, $zero, 48
+; LA64-NEXT: sub.d $a3, $a5, $a3
+; LA64-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a7, $a1, .LBB127_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a0, 0
+; LA64-NEXT: beqz $a6, .LBB127_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a5, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_max_i16_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: ext.w.h $a3, $a1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB127_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: slt $a5, $a3, $a4
+; LA64-LAMCAS-NEXT: masknez $a6, $a1, $a5
+; LA64-LAMCAS-NEXT: maskeqz $a5, $a0, $a5
+; LA64-LAMCAS-NEXT: or $a5, $a5, $a6
+; LA64-LAMCAS-NEXT: amcas.h $a0, $a5, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB127_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw max ptr %a, i16 %b monotonic
+ ret i16 %1
+}
+
+define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_min_i8_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a3, $a2, 24
+; LA64-NEXT: ori $a4, $zero, 255
+; LA64-NEXT: sll.w $a4, $a4, $a2
+; LA64-NEXT: ext.w.b $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: xori $a3, $a3, 56
+; LA64-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a1, $a7, .LBB128_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a0, 0
+; LA64-NEXT: beqz $a6, .LBB128_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a5, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_min_i8_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: ext.w.b $a3, $a1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB128_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: slt $a5, $a3, $a4
+; LA64-LAMCAS-NEXT: xori $a5, $a5, 1
+; LA64-LAMCAS-NEXT: masknez $a6, $a1, $a5
+; LA64-LAMCAS-NEXT: maskeqz $a5, $a0, $a5
+; LA64-LAMCAS-NEXT: or $a5, $a5, $a6
+; LA64-LAMCAS-NEXT: amcas.b $a0, $a5, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB128_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw min ptr %a, i8 %b monotonic
+ ret i8 %1
+}
+
+define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_min_i16_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a3, $a2, 24
+; LA64-NEXT: lu12i.w $a4, 15
+; LA64-NEXT: ori $a4, $a4, 4095
+; LA64-NEXT: sll.w $a4, $a4, $a2
+; LA64-NEXT: ext.w.h $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: ori $a5, $zero, 48
+; LA64-NEXT: sub.d $a3, $a5, $a3
+; LA64-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a1, $a7, .LBB129_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB129_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB129_3: # in Loop: Header=BB129_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a0, 0
+; LA64-NEXT: beqz $a6, .LBB129_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a5, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_min_i16_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: ext.w.h $a3, $a1
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB129_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: slt $a5, $a3, $a4
+; LA64-LAMCAS-NEXT: xori $a5, $a5, 1
+; LA64-LAMCAS-NEXT: masknez $a6, $a1, $a5
+; LA64-LAMCAS-NEXT: maskeqz $a5, $a0, $a5
+; LA64-LAMCAS-NEXT: or $a5, $a5, $a6
+; LA64-LAMCAS-NEXT: amcas.h $a0, $a5, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB129_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw min ptr %a, i16 %b monotonic
+ ret i16 %1
+}
+
+
+
+define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_nand_i8_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB130_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a5, $a4, $a1
+; LA64-NEXT: nor $a5, $a5, $zero
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB130_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_nand_i8_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB130_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: nor $a3, $a3, $zero
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB130_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw nand ptr %a, i8 %b monotonic
+ ret i8 %1
+}
+
+define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_nand_i16_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: .LBB131_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a5, $a4, $a1
+; LA64-NEXT: nor $a5, $a5, $zero
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB131_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_nand_i16_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB131_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: nor $a3, $a3, $zero
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB131_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw nand ptr %a, i16 %b monotonic
+ ret i16 %1
+
+}
+
+define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind {
+; LA64-LABEL: atomicrmw_nand_i32_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: .LBB132_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a2, $a0, 0
+; LA64-NEXT: and $a3, $a2, $a1
+; LA64-NEXT: nor $a3, $a3, $zero
+; LA64-NEXT: sc.w $a3, $a0, 0
+; LA64-NEXT: beqz $a3, .LBB132_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_nand_i32_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.w $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB132_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: move $a3, $a0
+; LA64-LAMCAS-NEXT: and $a4, $a0, $a1
+; LA64-LAMCAS-NEXT: nor $a4, $a4, $zero
+; LA64-LAMCAS-NEXT: amcas.w $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a3, .LBB132_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw nand ptr %a, i32 %b monotonic
+ ret i32 %1
+}
+
+define i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind {
+; LA64-LABEL: atomicrmw_nand_i64_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: .LBB133_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.d $a2, $a0, 0
+; LA64-NEXT: and $a3, $a2, $a1
+; LA64-NEXT: nor $a3, $a3, $zero
+; LA64-NEXT: sc.d $a3, $a0, 0
+; LA64-NEXT: beqz $a3, .LBB133_1
+; LA64-NEXT: # %bb.2:
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_nand_i64_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.d $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB133_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: move $a3, $a0
+; LA64-LAMCAS-NEXT: and $a4, $a0, $a1
+; LA64-LAMCAS-NEXT: nor $a4, $a4, $zero
+; LA64-LAMCAS-NEXT: amcas.d $a0, $a4, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a3, .LBB133_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw nand ptr %a, i64 %b monotonic
+ ret i64 %1
+}
+
+
+
+define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_and_i8_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: orn $a1, $a1, $a3
+; LA64-NEXT: amand.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_and_i8_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB134_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB134_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw and ptr %a, i8 %b monotonic
+ ret i8 %1
+}
+
+define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_and_i16_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: orn $a1, $a1, $a3
+; LA64-NEXT: amand.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_and_i16_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB135_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB135_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw and ptr %a, i16 %b monotonic
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_or_i8_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: amor.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_or_i8_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB136_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: or $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB136_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw or ptr %a, i8 %b monotonic
+ ret i8 %1
+}
+
+define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_or_i16_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: amor.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_or_i16_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB137_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: or $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB137_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw or ptr %a, i16 %b monotonic
+ ret i16 %1
+
+}
+
+define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_xor_i8_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: amxor.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xor_i8_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB138_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: xor $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
+; LA64-LAMCAS-NEXT: amcas.b $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB138_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xor ptr %a, i8 %b monotonic
+ ret i8 %1
+}
+
+define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_xor_i16_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: amxor.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
+; LA64-NEXT: ret
+;
+; LA64-LAMCAS-LABEL: atomicrmw_xor_i16_monotonic:
+; LA64-LAMCAS: # %bb.0:
+; LA64-LAMCAS-NEXT: move $a2, $a0
+; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
+; LA64-LAMCAS-NEXT: .p2align 4, , 16
+; LA64-LAMCAS-NEXT: .LBB139_1: # %atomicrmw.start
+; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-LAMCAS-NEXT: xor $a3, $a0, $a1
+; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
+; LA64-LAMCAS-NEXT: amcas.h $a0, $a3, $a2
+; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB139_1
+; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: ret
+ %1 = atomicrmw xor ptr %a, i16 %b monotonic
+ ret i16 %1
+
+}
>From 1c7400b273c8d53451cc46e95ba3aa786135dd64 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Mon, 18 Nov 2024 12:58:44 +0800
Subject: [PATCH 2/2] implement emitExpandAtomicRMW to custom expand i8/i16
atomicrmw and/or/xor
---
.../LoongArch/LoongArchISelLowering.cpp | 63 ++-
.../Target/LoongArch/LoongArchISelLowering.h | 1 +
.../ir-instruction/atomicrmw-lamcas.ll | 517 ++++++++----------
3 files changed, 278 insertions(+), 303 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index cf35fc1c992576..38dc7ebfaa5831 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -5726,6 +5726,58 @@ bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
}
}
+void LoongArchTargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
+ AtomicRMWInst::BinOp Op = AI->getOperation();
+
+ assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
+ Op == AtomicRMWInst::And) &&
+ "Unable to expand");
+ unsigned MinWordSize = 4;
+
+ IRBuilder<> Builder(AI);
+ LLVMContext &Ctx = Builder.getContext();
+ const DataLayout &DL = AI->getDataLayout();
+ Type *ValueType = AI->getType();
+ Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
+
+ Value *Addr = AI->getPointerOperand();
+ PointerType *PtrTy = cast<PointerType>(Addr->getType());
+ IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
+
+ Value *AlignedAddr = Builder.CreateIntrinsic(
+ Intrinsic::ptrmask, {PtrTy, IntTy},
+ {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
+ "AlignedAddr");
+
+ Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
+ Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
+ Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
+ ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
+ Value *Mask = Builder.CreateShl(
+ ConstantInt::get(WordType,
+ (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
+ ShiftAmt, "Mask");
+ Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
+ Value *ValOperand_Shifted =
+ Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
+ ShiftAmt, "ValOperand_Shifted");
+ Value *NewOperand;
+ if (Op == AtomicRMWInst::And)
+ NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
+ else
+ NewOperand = ValOperand_Shifted;
+
+ AtomicRMWInst *NewAI =
+ Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
+ AI->getOrdering(), AI->getSyncScopeID());
+
+ Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
+ Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
+ Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
+ AI->replaceAllUsesWith(FinalOldResult);
+ AI->eraseFromParent();
+}
+
TargetLowering::AtomicExpansionKind
LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
// TODO: Add more AtomicRMWInst that needs to be extended.
@@ -5747,9 +5799,14 @@ LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
}
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
- if (Subtarget.hasLAMCAS() &&
- (AI->getOperation() == AtomicRMWInst::Nand || Size < 32))
- return AtomicExpansionKind::CmpXChg;
+ if (Subtarget.hasLAMCAS()) {
+ if ((AI->getOperation() == AtomicRMWInst::And ||
+ AI->getOperation() == AtomicRMWInst::Or ||
+ AI->getOperation() == AtomicRMWInst::Xor))
+ return AtomicExpansionKind::Expand;
+ if ((AI->getOperation() == AtomicRMWInst::Nand || Size < 32))
+ return AtomicExpansionKind::CmpXChg;
+ }
if (Size == 8 || Size == 16)
return AtomicExpansionKind::MaskedIntrinsic;
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index df6a55a2b83190..4c1431917b50fb 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -191,6 +191,7 @@ class LoongArchTargetLowering : public TargetLowering {
bool hasAndNot(SDValue Y) const override;
TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ void emitExpandAtomicRMW(AtomicRMWInst *AI) const override;
Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI,
Value *AlignedAddr, Value *Incr,
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-lamcas.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-lamcas.ll
index 2f677038e1db37..cc2182f5a521a4 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-lamcas.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-lamcas.ll
@@ -2,6 +2,8 @@
; RUN: llc --mtriple=loongarch64 -mattr=+d,-lamcas < %s | FileCheck %s --check-prefix=LA64
; RUN: llc --mtriple=loongarch64 -mattr=+d,+lamcas < %s | FileCheck %s --check-prefix=LA64-LAMCAS
+; i8/i16 atomicrmw and/or/xor should't use amcas[_db].b/h to expand
+
define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
; LA64-LABEL: atomicrmw_xchg_i8_acquire:
; LA64: # %bb.0:
@@ -906,16 +908,15 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_and_i8_acquire:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB22_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB22_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: ori $a3, $zero, 255
+; LA64-LAMCAS-NEXT: sll.w $a3, $a3, $a2
+; LA64-LAMCAS-NEXT: andi $a1, $a1, 255
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: orn $a1, $a1, $a3
+; LA64-LAMCAS-NEXT: amand_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw and ptr %a, i8 %b acquire
ret i8 %1
@@ -938,16 +939,16 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_and_i16_acquire:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB23_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB23_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: lu12i.w $a3, 15
+; LA64-LAMCAS-NEXT: ori $a3, $a3, 4095
+; LA64-LAMCAS-NEXT: sll.w $a3, $a3, $a2
+; LA64-LAMCAS-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: orn $a1, $a1, $a3
+; LA64-LAMCAS-NEXT: amand_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw and ptr %a, i16 %b acquire
ret i16 %1
@@ -967,16 +968,12 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_or_i8_acquire:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB24_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: or $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB24_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: andi $a1, $a1, 255
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: amor_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw or ptr %a, i8 %b acquire
ret i8 %1
@@ -995,16 +992,12 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_or_i16_acquire:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB25_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: or $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB25_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: amor_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw or ptr %a, i16 %b acquire
ret i16 %1
@@ -1024,16 +1017,12 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_xor_i8_acquire:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB26_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: xor $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB26_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: andi $a1, $a1, 255
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: amxor_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw xor ptr %a, i8 %b acquire
ret i8 %1
@@ -1052,16 +1041,12 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_xor_i16_acquire:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB27_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: xor $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB27_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: amxor_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw xor ptr %a, i16 %b acquire
ret i16 %1
@@ -1972,16 +1957,15 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_and_i8_release:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB50_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB50_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: ori $a3, $zero, 255
+; LA64-LAMCAS-NEXT: sll.w $a3, $a3, $a2
+; LA64-LAMCAS-NEXT: andi $a1, $a1, 255
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: orn $a1, $a1, $a3
+; LA64-LAMCAS-NEXT: amand_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw and ptr %a, i8 %b release
ret i8 %1
@@ -2004,16 +1988,16 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_and_i16_release:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB51_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB51_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: lu12i.w $a3, 15
+; LA64-LAMCAS-NEXT: ori $a3, $a3, 4095
+; LA64-LAMCAS-NEXT: sll.w $a3, $a3, $a2
+; LA64-LAMCAS-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: orn $a1, $a1, $a3
+; LA64-LAMCAS-NEXT: amand_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw and ptr %a, i16 %b release
ret i16 %1
@@ -2033,16 +2017,12 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_or_i8_release:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB52_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: or $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB52_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: andi $a1, $a1, 255
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: amor_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw or ptr %a, i8 %b release
ret i8 %1
@@ -2061,16 +2041,12 @@ define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_or_i16_release:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB53_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: or $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB53_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: amor_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw or ptr %a, i16 %b release
ret i16 %1
@@ -2090,16 +2066,12 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_xor_i8_release:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB54_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: xor $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB54_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: andi $a1, $a1, 255
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: amxor_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw xor ptr %a, i8 %b release
ret i8 %1
@@ -2118,16 +2090,12 @@ define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_xor_i16_release:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB55_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: xor $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB55_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: amxor_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw xor ptr %a, i16 %b release
ret i16 %1
@@ -3040,16 +3008,15 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_and_i8_acq_rel:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB78_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB78_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: ori $a3, $zero, 255
+; LA64-LAMCAS-NEXT: sll.w $a3, $a3, $a2
+; LA64-LAMCAS-NEXT: andi $a1, $a1, 255
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: orn $a1, $a1, $a3
+; LA64-LAMCAS-NEXT: amand_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw and ptr %a, i8 %b acq_rel
ret i8 %1
@@ -3072,16 +3039,16 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_and_i16_acq_rel:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB79_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB79_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: lu12i.w $a3, 15
+; LA64-LAMCAS-NEXT: ori $a3, $a3, 4095
+; LA64-LAMCAS-NEXT: sll.w $a3, $a3, $a2
+; LA64-LAMCAS-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: orn $a1, $a1, $a3
+; LA64-LAMCAS-NEXT: amand_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw and ptr %a, i16 %b acq_rel
ret i16 %1
@@ -3101,16 +3068,12 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_or_i8_acq_rel:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB80_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: or $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB80_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: andi $a1, $a1, 255
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: amor_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw or ptr %a, i8 %b acq_rel
ret i8 %1
@@ -3129,16 +3092,12 @@ define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_or_i16_acq_rel:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB81_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: or $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB81_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: amor_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw or ptr %a, i16 %b acq_rel
ret i16 %1
@@ -3158,16 +3117,12 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_xor_i8_acq_rel:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB82_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: xor $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB82_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: andi $a1, $a1, 255
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: amxor_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw xor ptr %a, i8 %b acq_rel
ret i8 %1
@@ -3186,16 +3141,12 @@ define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_xor_i16_acq_rel:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB83_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: xor $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB83_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: amxor_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw xor ptr %a, i16 %b acq_rel
ret i16 %1
@@ -4108,16 +4059,15 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_and_i8_seq_cst:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB106_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB106_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: ori $a3, $zero, 255
+; LA64-LAMCAS-NEXT: sll.w $a3, $a3, $a2
+; LA64-LAMCAS-NEXT: andi $a1, $a1, 255
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: orn $a1, $a1, $a3
+; LA64-LAMCAS-NEXT: amand_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw and ptr %a, i8 %b seq_cst
ret i8 %1
@@ -4140,16 +4090,16 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_and_i16_seq_cst:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB107_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB107_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: lu12i.w $a3, 15
+; LA64-LAMCAS-NEXT: ori $a3, $a3, 4095
+; LA64-LAMCAS-NEXT: sll.w $a3, $a3, $a2
+; LA64-LAMCAS-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: orn $a1, $a1, $a3
+; LA64-LAMCAS-NEXT: amand_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw and ptr %a, i16 %b seq_cst
ret i16 %1
@@ -4169,16 +4119,12 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_or_i8_seq_cst:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB108_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: or $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB108_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: andi $a1, $a1, 255
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: amor_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw or ptr %a, i8 %b seq_cst
ret i8 %1
@@ -4197,16 +4143,12 @@ define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_or_i16_seq_cst:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB109_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: or $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB109_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: amor_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw or ptr %a, i16 %b seq_cst
ret i16 %1
@@ -4226,16 +4168,12 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_xor_i8_seq_cst:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB110_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: xor $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.b $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB110_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: andi $a1, $a1, 255
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: amxor_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw xor ptr %a, i8 %b seq_cst
ret i8 %1
@@ -4254,16 +4192,12 @@ define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_xor_i16_seq_cst:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB111_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: xor $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
-; LA64-LAMCAS-NEXT: amcas_db.h $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB111_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: amxor_db.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw xor ptr %a, i16 %b seq_cst
ret i16 %1
@@ -5178,16 +5112,15 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_and_i8_monotonic:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB134_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
-; LA64-LAMCAS-NEXT: amcas.b $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB134_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: ori $a3, $zero, 255
+; LA64-LAMCAS-NEXT: sll.w $a3, $a3, $a2
+; LA64-LAMCAS-NEXT: andi $a1, $a1, 255
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: orn $a1, $a1, $a3
+; LA64-LAMCAS-NEXT: amand.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw and ptr %a, i8 %b monotonic
ret i8 %1
@@ -5210,16 +5143,16 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_and_i16_monotonic:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB135_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: and $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
-; LA64-LAMCAS-NEXT: amcas.h $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB135_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: lu12i.w $a3, 15
+; LA64-LAMCAS-NEXT: ori $a3, $a3, 4095
+; LA64-LAMCAS-NEXT: sll.w $a3, $a3, $a2
+; LA64-LAMCAS-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: orn $a1, $a1, $a3
+; LA64-LAMCAS-NEXT: amand.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw and ptr %a, i16 %b monotonic
ret i16 %1
@@ -5239,16 +5172,12 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_or_i8_monotonic:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB136_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: or $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
-; LA64-LAMCAS-NEXT: amcas.b $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB136_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: andi $a1, $a1, 255
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: amor.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw or ptr %a, i8 %b monotonic
ret i8 %1
@@ -5267,16 +5196,12 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_or_i16_monotonic:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB137_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: or $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
-; LA64-LAMCAS-NEXT: amcas.h $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB137_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: amor.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw or ptr %a, i16 %b monotonic
ret i16 %1
@@ -5296,16 +5221,12 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_xor_i8_monotonic:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.bu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB138_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: xor $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.b $a4, $a0
-; LA64-LAMCAS-NEXT: amcas.b $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB138_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: andi $a1, $a1, 255
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: amxor.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw xor ptr %a, i8 %b monotonic
ret i8 %1
@@ -5324,16 +5245,12 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
;
; LA64-LAMCAS-LABEL: atomicrmw_xor_i16_monotonic:
; LA64-LAMCAS: # %bb.0:
-; LA64-LAMCAS-NEXT: move $a2, $a0
-; LA64-LAMCAS-NEXT: ld.hu $a0, $a0, 0
-; LA64-LAMCAS-NEXT: .p2align 4, , 16
-; LA64-LAMCAS-NEXT: .LBB139_1: # %atomicrmw.start
-; LA64-LAMCAS-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-LAMCAS-NEXT: xor $a3, $a0, $a1
-; LA64-LAMCAS-NEXT: ext.w.h $a4, $a0
-; LA64-LAMCAS-NEXT: amcas.h $a0, $a3, $a2
-; LA64-LAMCAS-NEXT: bne $a0, $a4, .LBB139_1
-; LA64-LAMCAS-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-LAMCAS-NEXT: slli.d $a2, $a0, 3
+; LA64-LAMCAS-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-LAMCAS-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-LAMCAS-NEXT: sll.w $a1, $a1, $a2
+; LA64-LAMCAS-NEXT: amxor.w $a3, $a1, $a0
+; LA64-LAMCAS-NEXT: srl.w $a0, $a3, $a2
; LA64-LAMCAS-NEXT: ret
%1 = atomicrmw xor ptr %a, i16 %b monotonic
ret i16 %1
More information about the llvm-commits
mailing list