[clang] [llvm] [LoongArch] Support sc.q instruction for 128bit cmpxchg operation (PR #116771)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 27 19:22:14 PST 2024
https://github.com/tangaac updated https://github.com/llvm/llvm-project/pull/116771
>From ee422d26ad2695d34b0bf471f6d4fa2c3bef8ca8 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Tue, 19 Nov 2024 17:43:31 +0800
Subject: [PATCH 1/2] [LoongArch] Support sc.q instruction for 128bit cmpxchg
operation
---
clang/include/clang/Driver/Options.td | 4 +
clang/lib/Basic/Targets/LoongArch.cpp | 7 +-
clang/lib/Basic/Targets/LoongArch.h | 2 +
.../lib/Driver/ToolChains/Arch/LoongArch.cpp | 8 +
clang/test/Driver/loongarch-march.c | 8 +-
clang/test/Driver/loongarch-mscq.c | 30 ++
clang/test/Preprocessor/init-loongarch.c | 31 +-
.../TargetParser/LoongArchTargetParser.def | 3 +-
.../llvm/TargetParser/LoongArchTargetParser.h | 3 +
llvm/lib/Target/LoongArch/LoongArch.td | 9 +-
.../LoongArchExpandAtomicPseudoInsts.cpp | 107 +++++++
.../LoongArch/LoongArchISelLowering.cpp | 46 +++
.../Target/LoongArch/LoongArchInstrInfo.td | 14 +
.../TargetParser/LoongArchTargetParser.cpp | 1 +
.../ir-instruction/atomic-cmpxchg-128.ll | 287 ++++++++++++++++++
15 files changed, 542 insertions(+), 18 deletions(-)
create mode 100644 clang/test/Driver/loongarch-mscq.c
create mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg-128.ll
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 808f089914c9bb..96cef360bd5251 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5425,6 +5425,10 @@ def mdiv32 : Flag<["-"], "mdiv32">, Group<m_loongarch_Features_Group>,
HelpText<"Use div.w[u] and mod.w[u] instructions with input not sign-extended.">;
def mno_div32 : Flag<["-"], "mno-div32">, Group<m_loongarch_Features_Group>,
HelpText<"Do not use div.w[u] and mod.w[u] instructions with input not sign-extended.">;
+def mscq : Flag<["-"], "mscq">, Group<m_loongarch_Features_Group>,
+ HelpText<"Enable sc.q instruction.">;
+def mno_scq : Flag<["-"], "mno-scq">, Group<m_loongarch_Features_Group>,
+ HelpText<"Disable sc.q instruction.">;
def mannotate_tablejump : Flag<["-"], "mannotate-tablejump">, Group<m_loongarch_Features_Group>,
HelpText<"Enable annotate table jump instruction to correlate it with the jump table.">;
def mno_annotate_tablejump : Flag<["-"], "mno-annotate-tablejump">, Group<m_loongarch_Features_Group>,
diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp
index d36186aa9c2fbf..bb0d0b68cfcb0a 100644
--- a/clang/lib/Basic/Targets/LoongArch.cpp
+++ b/clang/lib/Basic/Targets/LoongArch.cpp
@@ -206,7 +206,7 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts,
// arch feature set will be used to include all sub-features belonging to
// the V1.1 ISA version.
if (HasFeatureFrecipe && HasFeatureLAM_BH && HasFeatureLAMCAS &&
- HasFeatureLD_SEQ_SA && HasFeatureDiv32)
+ HasFeatureLD_SEQ_SA && HasFeatureDiv32 && HasFeatureSCQ)
Builder.defineMacro("__loongarch_arch",
Twine('"') + "la64v1.1" + Twine('"'));
else
@@ -249,6 +249,9 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts,
if (HasFeatureDiv32)
Builder.defineMacro("__loongarch_div32", Twine(1));
+ if (HasFeatureSCQ)
+ Builder.defineMacro("__loongarch_scq", Twine(1));
+
StringRef ABI = getABI();
if (ABI == "lp64d" || ABI == "lp64f" || ABI == "lp64s")
Builder.defineMacro("__loongarch_lp64");
@@ -333,6 +336,8 @@ bool LoongArchTargetInfo::handleTargetFeatures(
HasFeatureLD_SEQ_SA = true;
else if (Feature == "+div32")
HasFeatureDiv32 = true;
+ else if (Feature == "+scq")
+ HasFeatureSCQ = true;
}
return true;
}
diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h
index abaa05aa42d438..5c34c84ff8d3e8 100644
--- a/clang/lib/Basic/Targets/LoongArch.h
+++ b/clang/lib/Basic/Targets/LoongArch.h
@@ -34,6 +34,7 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo {
bool HasFeatureLAMCAS;
bool HasFeatureLD_SEQ_SA;
bool HasFeatureDiv32;
+ bool HasFeatureSCQ;
public:
LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
@@ -47,6 +48,7 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo {
HasFeatureLAMCAS = false;
HasFeatureLD_SEQ_SA = false;
HasFeatureDiv32 = false;
+ HasFeatureSCQ = false;
LongDoubleWidth = 128;
LongDoubleAlign = 128;
LongDoubleFormat = &llvm::APFloat::IEEEquad();
diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
index bbd9397aa2378a..4dd07f25bab0fb 100644
--- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
@@ -301,6 +301,14 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D,
else
Features.push_back("-div32");
}
+
+ // Select scq feature determined by -m[no-]scq.
+ if (const Arg *A = Args.getLastArg(options::OPT_mscq, options::OPT_mno_scq)) {
+ if (A->getOption().matches(options::OPT_mscq))
+ Features.push_back("+scq");
+ else
+ Features.push_back("-scq");
+ }
}
std::string loongarch::postProcessTargetCPUString(const std::string &CPU,
diff --git a/clang/test/Driver/loongarch-march.c b/clang/test/Driver/loongarch-march.c
index cfcfa852efea58..b52cdb330716ff 100644
--- a/clang/test/Driver/loongarch-march.c
+++ b/clang/test/Driver/loongarch-march.c
@@ -39,21 +39,21 @@
// CC1-LA64V1P1: "-target-cpu" "loongarch64"
// CC1-LA64V1P1-NOT: "-target-feature"
-// CC1-LA64V1P1: "-target-feature" "+64bit" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh" "-target-feature" "+lamcas" "-target-feature" "+ld-seq-sa" "-target-feature" "+div32"
+// CC1-LA64V1P1: "-target-feature" "+64bit" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh" "-target-feature" "+lamcas" "-target-feature" "+ld-seq-sa" "-target-feature" "+div32" "-target-feature" "+scq"
// CC1-LA64V1P1-NOT: "-target-feature"
// CC1-LA64V1P1: "-target-abi" "lp64d"
// CC1-LA664: "-target-cpu" "la664"
// CC1-LA664-NOT: "-target-feature"
-// CC1-LA664: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh" "-target-feature" "+lamcas" "-target-feature" "+ld-seq-sa" "-target-feature" "+div32"
+// CC1-LA664: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh" "-target-feature" "+lamcas" "-target-feature" "+ld-seq-sa" "-target-feature" "+div32" "-target-feature" "+scq"
// CC1-LA664-NOT: "-target-feature"
// CC1-LA664: "-target-abi" "lp64d"
// IR-LOONGARCH64: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+f,+ual"
// IR-LA464: attributes #[[#]] ={{.*}}"target-cpu"="la464" {{.*}}"target-features"="+64bit,+d,+f,+lasx,+lsx,+ual"
// IR-LA64V1P0: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+lsx,+ual"
-// IR-LA64V1P1: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+div32,+frecipe,+lam-bh,+lamcas,+ld-seq-sa,+lsx,+ual"
-// IR-LA664: attributes #[[#]] ={{.*}}"target-cpu"="la664" {{.*}}"target-features"="+64bit,+d,+div32,+f,+frecipe,+lam-bh,+lamcas,+lasx,+ld-seq-sa,+lsx,+ual"
+// IR-LA64V1P1: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+div32,+frecipe,+lam-bh,+lamcas,+ld-seq-sa,+lsx,+scq,+ual"
+// IR-LA664: attributes #[[#]] ={{.*}}"target-cpu"="la664" {{.*}}"target-features"="+64bit,+d,+div32,+f,+frecipe,+lam-bh,+lamcas,+lasx,+ld-seq-sa,+lsx,+scq,+ual"
int foo(void) {
return 3;
diff --git a/clang/test/Driver/loongarch-mscq.c b/clang/test/Driver/loongarch-mscq.c
new file mode 100644
index 00000000000000..cd798ba5d8ff2b
--- /dev/null
+++ b/clang/test/Driver/loongarch-mscq.c
@@ -0,0 +1,30 @@
+/// Test -m[no]scq options.
+
+// RUN: %clang --target=loongarch64 -mscq -fsyntax-only %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CC1-SCQ
+// RUN: %clang --target=loongarch64 -mno-scq -fsyntax-only %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CC1-NO-SCQ
+// RUN: %clang --target=loongarch64 -mno-scq -mscq -fsyntax-only %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CC1-SCQ
+// RUN: %clang --target=loongarch64 -mscq -mno-scq -fsyntax-only %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CC1-NO-SCQ
+
+// RUN: %clang --target=loongarch64 -mscq -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-SCQ
+// RUN: %clang --target=loongarch64 -mno-scq -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-NO-SCQ
+// RUN: %clang --target=loongarch64 -mno-scq -mscq -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-SCQ
+// RUN: %clang --target=loongarch64 -mscq -mno-scq -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-NO-SCQ
+
+
+// CC1-SCQ: "-target-feature" "+scq"
+// CC1-NO-SCQ: "-target-feature" "-scq"
+
+// IR-SCQ: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+scq{{(,.*)?}}"
+// IR-NO-SCQ: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-scq{{(,.*)?}}"
+
+int foo(void) {
+ return 42;
+}
\ No newline at end of file
diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c
index 19458a2b14f40c..f6fd603dc39c0b 100644
--- a/clang/test/Preprocessor/init-loongarch.c
+++ b/clang/test/Preprocessor/init-loongarch.c
@@ -798,7 +798,7 @@
// LA64-FPU0-LP64S-NOT: #define __loongarch_single_float
// LA64-FPU0-LP64S: #define __loongarch_soft_float 1
-/// Check __loongarch_arch{_tune/_frecipe/_lam_bh/_lamcas/_ld_seq_sa/_div32}.
+/// Check __loongarch_arch{_tune/_frecipe/_lam_bh/_lamcas/_ld_seq_sa/_div32/_scq}.
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - | \
// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s
@@ -823,11 +823,11 @@
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx | \
// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 | \
-// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,LD-SEQ-SA,DIV32 -DARCH=la64v1.1 -DTUNE=loongarch64 %s
+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,LD-SEQ-SA,DIV32,SCQ -DARCH=la64v1.1 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -frecipe | \
-// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH,LAMCAS,LD-SEQ-SA,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s
+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH,LAMCAS,LD-SEQ-SA,DIV32,SCQ -DARCH=la64v1.0 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -lsx | \
-// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,LD-SEQ-SA,DIV32 -DARCH=loongarch64 -DTUNE=loongarch64 %s
+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,LD-SEQ-SA,DIV32,SCQ -DARCH=loongarch64 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +frecipe | \
// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=loongarch64 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +frecipe | \
@@ -835,7 +835,7 @@
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +lam-bh | \
// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH -DARCH=la64v1.0 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -lam-bh | \
-// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAMCAS,LD-SEQ-SA,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s
+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAMCAS,LD-SEQ-SA,DIV32,SCQ -DARCH=la64v1.0 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lam-bh | \
// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH -DARCH=loongarch64 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +lam-bh | \
@@ -843,7 +843,7 @@
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +lamcas | \
// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAMCAS -DARCH=la64v1.0 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -lamcas | \
-// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LD-SEQ-SA,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s
+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LD-SEQ-SA,DIV32,SCQ -DARCH=la64v1.0 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lamcas | \
// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAMCAS -DARCH=loongarch64 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +lamcas | \
@@ -851,7 +851,7 @@
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +ld-seq-sa | \
// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LD-SEQ-SA -DARCH=la64v1.0 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -ld-seq-sa | \
-// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s
+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,DIV32,SCQ -DARCH=la64v1.0 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +ld-seq-sa | \
// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LD-SEQ-SA -DARCH=loongarch64 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +ld-seq-sa | \
@@ -859,21 +859,29 @@
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +div32 | \
// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -div32| \
-// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,LD-SEQ-SA -DARCH=la64v1.0 -DTUNE=loongarch64 %s
+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,LD-SEQ-SA,SCQ -DARCH=la64v1.0 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +div32 | \
// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,DIV32 -DARCH=loongarch64 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +div32 | \
// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s
-// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +frecipe -Xclang -target-feature -Xclang +lam-bh -Xclang -target-feature -Xclang +lamcas -Xclang -target-feature -Xclang +ld-seq-sa -Xclang -target-feature -Xclang +div32 | \
+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +scq | \
+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,SCQ -DARCH=la64v1.0 -DTUNE=loongarch64 %s
+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -scq | \
+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,LD-SEQ-SA,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s
+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +scq | \
+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,SCQ -DARCH=loongarch64 -DTUNE=loongarch64 %s
+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +scq | \
+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,SCQ -DARCH=la64v1.0 -DTUNE=loongarch64 %s
+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +frecipe -Xclang -target-feature -Xclang +lam-bh -Xclang -target-feature -Xclang +lamcas -Xclang -target-feature -Xclang +ld-seq-sa -Xclang -target-feature -Xclang +div32 -Xclang -target-feature -Xclang +scq | \
// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE -DARCH=la64v1.1 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 | \
-// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,LD-SEQ-SA,DIV32 -DARCH=la664 -DTUNE=la664 %s
+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,LD-SEQ-SA,DIV32,SCQ -DARCH=la664 -DTUNE=la664 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -mtune=la664 | \
// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=la664 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -mtune=la664 | \
// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la664 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 -mtune=loongarch64 | \
-// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,LD-SEQ-SA,DIV32 -DARCH=la664 -DTUNE=loongarch64 %s
+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,LD-SEQ-SA,DIV32,SCQ -DARCH=la664 -DTUNE=loongarch64 %s
// ARCH-TUNE: #define __loongarch_arch "[[ARCH]]"
// DIV32: #define __loongarch_div32 1
@@ -881,6 +889,7 @@
// LAM-BH: #define __loongarch_lam_bh 1
// LAMCAS: #define __loongarch_lamcas 1
// LD-SEQ-SA: #define __loongarch_ld_seq_sa 1
+// SCQ: #define __loongarch_scq 1
// ARCH-TUNE: #define __loongarch_tune "[[TUNE]]"
// RUN: %clang --target=loongarch64 -mlsx -x c -E -dM %s -o - \
diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def
index 6731a2c975cd54..1bcf65b37f201e 100644
--- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def
+++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def
@@ -15,6 +15,7 @@ LOONGARCH_FEATURE("+lam-bh", FK_LAM_BH)
LOONGARCH_FEATURE("+lamcas", FK_LAMCAS)
LOONGARCH_FEATURE("+ld-seq-sa", FK_LD_SEQ_SA)
LOONGARCH_FEATURE("+div32", FK_DIV32)
+LOONGARCH_FEATURE("+scq", FK_SCQ)
#undef LOONGARCH_FEATURE
@@ -24,6 +25,6 @@ LOONGARCH_FEATURE("+div32", FK_DIV32)
LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64 | FK_UAL)
LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL)
-LOONGARCH_ARCH("la664", AK_LA664, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL | FK_FRECIPE | FK_LAM_BH | FK_LAMCAS | FK_LD_SEQ_SA | FK_DIV32)
+LOONGARCH_ARCH("la664", AK_LA664, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL | FK_FRECIPE | FK_LAM_BH | FK_LAMCAS | FK_LD_SEQ_SA | FK_DIV32 | FK_SCQ)
#undef LOONGARCH_ARCH
diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h
index 52cd51f43ad640..e08e7bc182e112 100644
--- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h
+++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h
@@ -63,6 +63,9 @@ enum FeatureKind : uint32_t {
// Assume div.w[u] and mod.w[u] can handle inputs that are not sign-extended.
FK_DIV32 = 1 << 13,
+
+ // sc.q is available.
+ FK_SCQ = 1 << 14,
};
struct FeatureInfo {
diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td
index 596c8c90c0a1f6..5fd52babfc6ec3 100644
--- a/llvm/lib/Target/LoongArch/LoongArch.td
+++ b/llvm/lib/Target/LoongArch/LoongArch.td
@@ -135,6 +135,12 @@ def FeatureDiv32
"Assume div.w[u] and mod.w[u] can handle inputs that are not sign-extended">;
def HasDiv32 : Predicate<"Subtarget->hasDiv32()">;
+// Support SC.Q instruction
+def FeatureSCQ
+ : SubtargetFeature<"scq", "HasSCQ", "true",
+ "Support sc.q instruction">;
+def HasSCQ : Predicate<"Subtarget->hasSCQ()">;
+
def TunePreferWInst
: SubtargetFeature<"prefer-w-inst", "PreferWInst", "true",
"Prefer instructions with W suffix">;
@@ -180,7 +186,8 @@ def : ProcessorModel<"la664", NoSchedModel, [Feature64Bit,
FeatureLAM_BH,
FeatureLAMCAS,
FeatureLD_SEQ_SA,
- FeatureDiv32]>;
+ FeatureDiv32,
+ FeatureSCQ]>;
//===----------------------------------------------------------------------===//
// Define the LoongArch target.
diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
index 35f84425cb0eba..4a5475bafd475d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
@@ -58,6 +58,9 @@ class LoongArchExpandAtomicPseudo : public MachineFunctionPass {
bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, bool IsMasked,
int Width, MachineBasicBlock::iterator &NextMBBI);
+ bool expandAtomicCmpXchg128(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator,
+ MachineBasicBlock::iterator &NextMBBI);
};
char LoongArchExpandAtomicPseudo::ID = 0;
@@ -131,6 +134,9 @@ bool LoongArchExpandAtomicPseudo::expandMI(
return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);
case LoongArch::PseudoCmpXchg64:
return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
+ case LoongArch::PseudoCmpXchg128:
+ case LoongArch::PseudoCmpXchg128Acquire:
+ return expandAtomicCmpXchg128(MBB, MBBI, NextMBBI);
case LoongArch::PseudoMaskedCmpXchg32:
return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
case LoongArch::PseudoMaskedAtomicLoadMax32:
@@ -604,6 +610,107 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg(
return true;
}
+bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg128(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+ MachineFunction *MF = MBB.getParent();
+ auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto TailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ // Insert new MBBs
+ MF->insert(++MBB.getIterator(), LoopHeadMBB);
+ MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB);
+ MF->insert(++LoopTailMBB->getIterator(), TailMBB);
+ MF->insert(++TailMBB->getIterator(), DoneMBB);
+
+ // Set up successors and transfer remaining instructions to DoneMBB.
+ LoopHeadMBB->addSuccessor(LoopTailMBB);
+ LoopHeadMBB->addSuccessor(TailMBB);
+ LoopTailMBB->addSuccessor(DoneMBB);
+ LoopTailMBB->addSuccessor(LoopHeadMBB);
+ TailMBB->addSuccessor(DoneMBB);
+ DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
+ DoneMBB->transferSuccessors(&MBB);
+ MBB.addSuccessor(LoopHeadMBB);
+
+ Register DestLoReg = MI.getOperand(0).getReg();
+ Register DestHiReg = MI.getOperand(1).getReg();
+ Register ScratchReg = MI.getOperand(2).getReg();
+ Register AddrReg = MI.getOperand(3).getReg();
+ Register CmpValLoReg = MI.getOperand(4).getReg();
+ Register CmpValHiReg = MI.getOperand(5).getReg();
+ Register NewValLoReg = MI.getOperand(6).getReg();
+ Register NewValHiReg = MI.getOperand(7).getReg();
+
+ // .loophead:
+ // ll.d res_lo, (addr)
+ // ld.d res_hi, (addr), 8
+ // bne dest_lo, cmpval_lo, tail
+ // bne dest_hi, cmpval_hi, tail
+ BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_D), DestLoReg)
+ .addReg(AddrReg)
+ .addImm(0);
+ BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LD_D), DestHiReg)
+ .addReg(AddrReg)
+ .addImm(8);
+ BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BNE))
+ .addReg(DestLoReg)
+ .addReg(CmpValLoReg)
+ .addMBB(TailMBB);
+ BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BNE))
+ .addReg(DestHiReg)
+ .addReg(CmpValHiReg)
+ .addMBB(TailMBB);
+ // .looptail:
+ // move scratch, newval_lo
+ // sc.q scratch, newval_hi, (addr)
+ // beqz scratch, loophead
+ // b done
+ BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg)
+ .addReg(NewValLoReg)
+ .addReg(LoongArch::R0);
+ BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_Q), ScratchReg)
+ .addReg(ScratchReg)
+ .addReg(NewValHiReg)
+ .addReg(AddrReg);
+ BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQZ))
+ .addReg(ScratchReg)
+ .addMBB(LoopHeadMBB);
+ BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB);
+ int hint;
+
+ switch (MI.getOpcode()) {
+ case LoongArch::PseudoCmpXchg128Acquire:
+ // acquire acqrel seqcst
+ hint = 0b10100;
+ break;
+ case LoongArch::PseudoCmpXchg128:
+ hint = 0x700;
+ break;
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+
+ // .tail:
+ // dbar 0x700 | acquire
+ BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(hint);
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+
+ LivePhysRegs LiveRegs;
+ computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
+ computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
+ computeAndAddLiveIns(LiveRegs, *TailMBB);
+ computeAndAddLiveIns(LiveRegs, *DoneMBB);
+
+ return true;
+}
+
} // end namespace
INITIALIZE_PASS(LoongArchExpandAtomicPseudo, "loongarch-expand-atomic-pseudo",
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 16bceacfaa222c..15a9dd892b1ca5 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -376,6 +376,11 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
// cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
if (Subtarget.hasLAMCAS())
setMinCmpXchgSizeInBits(8);
+
+ if (Subtarget.hasSCQ()) {
+ setMaxAtomicSizeInBitsSupported(128);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
+ }
}
bool LoongArchTargetLowering::isOffsetFoldingLegal(
@@ -2825,6 +2830,43 @@ replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
}
}
+static void replaceCMP_XCHG_128Results(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) {
+ assert(N->getValueType(0) == MVT::i128 &&
+ "AtomicCmpSwap on types less than 128 should be legal");
+ MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+
+ unsigned Opcode;
+ switch (MemOp->getMergedOrdering()) {
+ case AtomicOrdering::Acquire:
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ Opcode = LoongArch::PseudoCmpXchg128Acquire;
+ break;
+ case AtomicOrdering::Monotonic:
+ case AtomicOrdering::Release:
+ Opcode = LoongArch::PseudoCmpXchg128;
+ break;
+ default:
+ llvm_unreachable("Unexpected ordering!");
+ }
+
+ SDLoc DL(N);
+ auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
+ auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
+ SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
+ NewVal.first, NewVal.second, N->getOperand(0)};
+
+ SDNode *CmpSwap = DAG.getMachineNode(
+ Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
+ Ops);
+ DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
+ SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
+ Results.push_back(SDValue(CmpSwap, 3));
+}
+
void LoongArchTargetLowering::ReplaceNodeResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
SDLoc DL(N);
@@ -3135,6 +3177,10 @@ void LoongArchTargetLowering::ReplaceNodeResults(
Results.push_back(Result);
break;
}
+ case ISD::ATOMIC_CMP_SWAP: {
+ replaceCMP_XCHG_128Results(N, Results, DAG);
+ break;
+ }
}
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 6134daf2fbe630..e7bd8c9a375a2e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -2024,6 +2024,20 @@ class PseudoCmpXchg
def PseudoCmpXchg32 : PseudoCmpXchg;
def PseudoCmpXchg64 : PseudoCmpXchg;
+class PseudoCmpXchg128Pat
+ : Pseudo<(outs GPR:$res_lo, GPR:$res_hi, GPR:$scratch),
+ (ins GPR:$addr, GPR:$cmpval_lo, GPR:$cmpval_hi,
+ GPR:$newval_lo, GPR:$newval_hi)> {
+ let Constraints = "@earlyclobber $res_lo, at earlyclobber $res_hi, at earlyclobber $scratch";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+ let Size = 36;
+}
+
+def PseudoCmpXchg128 : PseudoCmpXchg128Pat;
+def PseudoCmpXchg128Acquire : PseudoCmpXchg128Pat;
+
def PseudoMaskedCmpXchg32
: Pseudo<(outs GPR:$res, GPR:$scratch),
(ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask,
diff --git a/llvm/lib/TargetParser/LoongArchTargetParser.cpp b/llvm/lib/TargetParser/LoongArchTargetParser.cpp
index c8a07c32247cdf..e394c0c15b207c 100644
--- a/llvm/lib/TargetParser/LoongArchTargetParser.cpp
+++ b/llvm/lib/TargetParser/LoongArchTargetParser.cpp
@@ -56,6 +56,7 @@ bool LoongArch::getArchFeatures(StringRef Arch,
Features.push_back("+lamcas");
Features.push_back("+ld-seq-sa");
Features.push_back("+div32");
+ Features.push_back("+scq");
}
return true;
}
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg-128.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg-128.ll
new file mode 100644
index 00000000000000..11b2622a94551a
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg-128.ll
@@ -0,0 +1,287 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch64 -mattr=+d,-scq < %s | FileCheck %s --check-prefix=LA64
+; RUN: llc --mtriple=loongarch64 -mattr=+d,+scq < %s | FileCheck %s --check-prefix=LA64-SCQ
+
+define void @cmpxchg_i128_acquire_acquire(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; LA64-LABEL: cmpxchg_i128_acquire_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -32
+; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: move $a6, $a4
+; LA64-NEXT: st.d $a2, $sp, 8
+; LA64-NEXT: st.d $a1, $sp, 0
+; LA64-NEXT: addi.d $a1, $sp, 0
+; LA64-NEXT: ori $a4, $zero, 2
+; LA64-NEXT: ori $a5, $zero, 2
+; LA64-NEXT: move $a2, $a3
+; LA64-NEXT: move $a3, $a6
+; LA64-NEXT: bl %plt(__atomic_compare_exchange_16)
+; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 32
+; LA64-NEXT: ret
+;
+; LA64-SCQ-LABEL: cmpxchg_i128_acquire_acquire:
+; LA64-SCQ: # %bb.0:
+; LA64-SCQ-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; LA64-SCQ-NEXT: ll.d $a5, $a0, 0
+; LA64-SCQ-NEXT: ld.d $a6, $a0, 8
+; LA64-SCQ-NEXT: bne $a5, $a1, .LBB0_3
+; LA64-SCQ-NEXT: bne $a6, $a2, .LBB0_3
+; LA64-SCQ-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1
+; LA64-SCQ-NEXT: move $a7, $a3
+; LA64-SCQ-NEXT: sc.q $a7, $a4, $a0
+; LA64-SCQ-NEXT: beqz $a7, .LBB0_1
+; LA64-SCQ-NEXT: b .LBB0_4
+; LA64-SCQ-NEXT: .LBB0_3:
+; LA64-SCQ-NEXT: dbar 20
+; LA64-SCQ-NEXT: .LBB0_4:
+; LA64-SCQ-NEXT: ret
+ %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val acquire acquire
+ ret void
+}
+
+define void @cmpxchg_i128_acquire_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; LA64-LABEL: cmpxchg_i128_acquire_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -32
+; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: st.d $a2, $sp, 8
+; LA64-NEXT: st.d $a1, $sp, 0
+; LA64-NEXT: addi.d $a1, $sp, 0
+; LA64-NEXT: ori $a4, $zero, 2
+; LA64-NEXT: move $a2, $a3
+; LA64-NEXT: move $a3, $a5
+; LA64-NEXT: move $a5, $zero
+; LA64-NEXT: bl %plt(__atomic_compare_exchange_16)
+; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 32
+; LA64-NEXT: ret
+;
+; LA64-SCQ-LABEL: cmpxchg_i128_acquire_monotonic:
+; LA64-SCQ: # %bb.0:
+; LA64-SCQ-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; LA64-SCQ-NEXT: ll.d $a5, $a0, 0
+; LA64-SCQ-NEXT: ld.d $a6, $a0, 8
+; LA64-SCQ-NEXT: bne $a5, $a1, .LBB1_3
+; LA64-SCQ-NEXT: bne $a6, $a2, .LBB1_3
+; LA64-SCQ-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; LA64-SCQ-NEXT: move $a7, $a3
+; LA64-SCQ-NEXT: sc.q $a7, $a4, $a0
+; LA64-SCQ-NEXT: beqz $a7, .LBB1_1
+; LA64-SCQ-NEXT: b .LBB1_4
+; LA64-SCQ-NEXT: .LBB1_3:
+; LA64-SCQ-NEXT: dbar 20
+; LA64-SCQ-NEXT: .LBB1_4:
+; LA64-SCQ-NEXT: ret
+ %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val acquire monotonic
+ ret void
+}
+
+define i128 @cmpxchg_i128_acquire_acquire_reti128(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; LA64-LABEL: cmpxchg_i128_acquire_acquire_reti128:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -32
+; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: move $a6, $a4
+; LA64-NEXT: st.d $a2, $sp, 8
+; LA64-NEXT: st.d $a1, $sp, 0
+; LA64-NEXT: addi.d $a1, $sp, 0
+; LA64-NEXT: ori $a4, $zero, 2
+; LA64-NEXT: ori $a5, $zero, 2
+; LA64-NEXT: move $a2, $a3
+; LA64-NEXT: move $a3, $a6
+; LA64-NEXT: bl %plt(__atomic_compare_exchange_16)
+; LA64-NEXT: ld.d $a1, $sp, 8
+; LA64-NEXT: ld.d $a0, $sp, 0
+; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 32
+; LA64-NEXT: ret
+;
+; LA64-SCQ-LABEL: cmpxchg_i128_acquire_acquire_reti128:
+; LA64-SCQ: # %bb.0:
+; LA64-SCQ-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; LA64-SCQ-NEXT: ll.d $a5, $a0, 0
+; LA64-SCQ-NEXT: ld.d $a6, $a0, 8
+; LA64-SCQ-NEXT: bne $a5, $a1, .LBB2_3
+; LA64-SCQ-NEXT: bne $a6, $a2, .LBB2_3
+; LA64-SCQ-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; LA64-SCQ-NEXT: move $a7, $a3
+; LA64-SCQ-NEXT: sc.q $a7, $a4, $a0
+; LA64-SCQ-NEXT: beqz $a7, .LBB2_1
+; LA64-SCQ-NEXT: b .LBB2_4
+; LA64-SCQ-NEXT: .LBB2_3:
+; LA64-SCQ-NEXT: dbar 20
+; LA64-SCQ-NEXT: .LBB2_4:
+; LA64-SCQ-NEXT: move $a0, $a5
+; LA64-SCQ-NEXT: move $a1, $a6
+; LA64-SCQ-NEXT: ret
+ %tmp = cmpxchg ptr %ptr, i128 %cmp, i128 %val acquire acquire
+ %res = extractvalue { i128, i1 } %tmp, 0
+ ret i128 %res
+}
+
+define i1 @cmpxchg_i128_acquire_acquire_reti1(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; LA64-LABEL: cmpxchg_i128_acquire_acquire_reti1:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -32
+; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: move $a6, $a4
+; LA64-NEXT: st.d $a2, $sp, 8
+; LA64-NEXT: st.d $a1, $sp, 0
+; LA64-NEXT: addi.d $a1, $sp, 0
+; LA64-NEXT: ori $a4, $zero, 2
+; LA64-NEXT: ori $a5, $zero, 2
+; LA64-NEXT: move $a2, $a3
+; LA64-NEXT: move $a3, $a6
+; LA64-NEXT: bl %plt(__atomic_compare_exchange_16)
+; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 32
+; LA64-NEXT: ret
+;
+; LA64-SCQ-LABEL: cmpxchg_i128_acquire_acquire_reti1:
+; LA64-SCQ: # %bb.0:
+; LA64-SCQ-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; LA64-SCQ-NEXT: ll.d $a5, $a0, 0
+; LA64-SCQ-NEXT: ld.d $a6, $a0, 8
+; LA64-SCQ-NEXT: bne $a5, $a1, .LBB3_3
+; LA64-SCQ-NEXT: bne $a6, $a2, .LBB3_3
+; LA64-SCQ-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1
+; LA64-SCQ-NEXT: move $a7, $a3
+; LA64-SCQ-NEXT: sc.q $a7, $a4, $a0
+; LA64-SCQ-NEXT: beqz $a7, .LBB3_1
+; LA64-SCQ-NEXT: b .LBB3_4
+; LA64-SCQ-NEXT: .LBB3_3:
+; LA64-SCQ-NEXT: dbar 20
+; LA64-SCQ-NEXT: .LBB3_4:
+; LA64-SCQ-NEXT: xor $a0, $a6, $a2
+; LA64-SCQ-NEXT: xor $a1, $a5, $a1
+; LA64-SCQ-NEXT: or $a0, $a1, $a0
+; LA64-SCQ-NEXT: sltui $a0, $a0, 1
+; LA64-SCQ-NEXT: ret
+ %tmp = cmpxchg ptr %ptr, i128 %cmp, i128 %val acquire acquire
+ %res = extractvalue { i128, i1 } %tmp, 1
+ ret i1 %res
+}
+
+
+define void @cmpxchg_i128_monotonic_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; LA64-LABEL: cmpxchg_i128_monotonic_monotonic:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -32
+; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: st.d $a2, $sp, 8
+; LA64-NEXT: st.d $a1, $sp, 0
+; LA64-NEXT: addi.d $a1, $sp, 0
+; LA64-NEXT: move $a2, $a3
+; LA64-NEXT: move $a3, $a4
+; LA64-NEXT: move $a4, $zero
+; LA64-NEXT: move $a5, $zero
+; LA64-NEXT: bl %plt(__atomic_compare_exchange_16)
+; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 32
+; LA64-NEXT: ret
+;
+; LA64-SCQ-LABEL: cmpxchg_i128_monotonic_monotonic:
+; LA64-SCQ: # %bb.0:
+; LA64-SCQ-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; LA64-SCQ-NEXT: ll.d $a5, $a0, 0
+; LA64-SCQ-NEXT: ld.d $a6, $a0, 8
+; LA64-SCQ-NEXT: bne $a5, $a1, .LBB4_3
+; LA64-SCQ-NEXT: bne $a6, $a2, .LBB4_3
+; LA64-SCQ-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; LA64-SCQ-NEXT: move $a7, $a3
+; LA64-SCQ-NEXT: sc.q $a7, $a4, $a0
+; LA64-SCQ-NEXT: beqz $a7, .LBB4_1
+; LA64-SCQ-NEXT: b .LBB4_4
+; LA64-SCQ-NEXT: .LBB4_3:
+; LA64-SCQ-NEXT: dbar 1792
+; LA64-SCQ-NEXT: .LBB4_4:
+; LA64-SCQ-NEXT: ret
+ %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val monotonic monotonic
+ ret void
+}
+
+define i128 @cmpxchg_i128_monotonic_monotonic_reti128(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; LA64-LABEL: cmpxchg_i128_monotonic_monotonic_reti128:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -32
+; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: st.d $a2, $sp, 8
+; LA64-NEXT: st.d $a1, $sp, 0
+; LA64-NEXT: addi.d $a1, $sp, 0
+; LA64-NEXT: move $a2, $a3
+; LA64-NEXT: move $a3, $a4
+; LA64-NEXT: move $a4, $zero
+; LA64-NEXT: move $a5, $zero
+; LA64-NEXT: bl %plt(__atomic_compare_exchange_16)
+; LA64-NEXT: ld.d $a1, $sp, 8
+; LA64-NEXT: ld.d $a0, $sp, 0
+; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 32
+; LA64-NEXT: ret
+;
+; LA64-SCQ-LABEL: cmpxchg_i128_monotonic_monotonic_reti128:
+; LA64-SCQ: # %bb.0:
+; LA64-SCQ-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; LA64-SCQ-NEXT: ll.d $a5, $a0, 0
+; LA64-SCQ-NEXT: ld.d $a6, $a0, 8
+; LA64-SCQ-NEXT: bne $a5, $a1, .LBB5_3
+; LA64-SCQ-NEXT: bne $a6, $a2, .LBB5_3
+; LA64-SCQ-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; LA64-SCQ-NEXT: move $a7, $a3
+; LA64-SCQ-NEXT: sc.q $a7, $a4, $a0
+; LA64-SCQ-NEXT: beqz $a7, .LBB5_1
+; LA64-SCQ-NEXT: b .LBB5_4
+; LA64-SCQ-NEXT: .LBB5_3:
+; LA64-SCQ-NEXT: dbar 1792
+; LA64-SCQ-NEXT: .LBB5_4:
+; LA64-SCQ-NEXT: move $a0, $a5
+; LA64-SCQ-NEXT: move $a1, $a6
+; LA64-SCQ-NEXT: ret
+ %tmp = cmpxchg ptr %ptr, i128 %cmp, i128 %val monotonic monotonic
+ %res = extractvalue { i128, i1 } %tmp, 0
+ ret i128 %res
+}
+
+define i1 @cmpxchg_i128_monotonic_monotonic_reti1(ptr %ptr, i128 %cmp, i128 %val) nounwind {
+; LA64-LABEL: cmpxchg_i128_monotonic_monotonic_reti1:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -32
+; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: st.d $a2, $sp, 8
+; LA64-NEXT: st.d $a1, $sp, 0
+; LA64-NEXT: addi.d $a1, $sp, 0
+; LA64-NEXT: move $a2, $a3
+; LA64-NEXT: move $a3, $a4
+; LA64-NEXT: move $a4, $zero
+; LA64-NEXT: move $a5, $zero
+; LA64-NEXT: bl %plt(__atomic_compare_exchange_16)
+; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 32
+; LA64-NEXT: ret
+;
+; LA64-SCQ-LABEL: cmpxchg_i128_monotonic_monotonic_reti1:
+; LA64-SCQ: # %bb.0:
+; LA64-SCQ-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
+; LA64-SCQ-NEXT: ll.d $a5, $a0, 0
+; LA64-SCQ-NEXT: ld.d $a6, $a0, 8
+; LA64-SCQ-NEXT: bne $a5, $a1, .LBB6_3
+; LA64-SCQ-NEXT: bne $a6, $a2, .LBB6_3
+; LA64-SCQ-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; LA64-SCQ-NEXT: move $a7, $a3
+; LA64-SCQ-NEXT: sc.q $a7, $a4, $a0
+; LA64-SCQ-NEXT: beqz $a7, .LBB6_1
+; LA64-SCQ-NEXT: b .LBB6_4
+; LA64-SCQ-NEXT: .LBB6_3:
+; LA64-SCQ-NEXT: dbar 1792
+; LA64-SCQ-NEXT: .LBB6_4:
+; LA64-SCQ-NEXT: xor $a0, $a6, $a2
+; LA64-SCQ-NEXT: xor $a1, $a5, $a1
+; LA64-SCQ-NEXT: or $a0, $a1, $a0
+; LA64-SCQ-NEXT: sltui $a0, $a0, 1
+; LA64-SCQ-NEXT: ret
+ %tmp = cmpxchg ptr %ptr, i128 %cmp, i128 %val monotonic monotonic
+ %res = extractvalue { i128, i1 } %tmp, 1
+ ret i1 %res
+}
>From 7eb12cdc99f2af168a62d106875a81709bd60d6d Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Thu, 28 Nov 2024 11:19:17 +0800
Subject: [PATCH 2/2] \scq work with ld-seq-sa
---
.../LoongArchExpandAtomicPseudoInsts.cpp | 3 +-
.../ir-instruction/atomic-cmpxchg-128.ll | 167 ++++++++++++------
2 files changed, 113 insertions(+), 57 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
index 4a5475bafd475d..f46360faf97a67 100644
--- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
@@ -697,7 +697,8 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg128(
// .tail:
// dbar 0x700 | acquire
- BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(hint);
+ if (!(hint == 0x700 && MF->getSubtarget<LoongArchSubtarget>().hasLD_SEQ_SA()))
+ BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(hint);
NextMBBI = MBB.end();
MI.eraseFromParent();
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg-128.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg-128.ll
index 11b2622a94551a..d0187760dba3bd 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg-128.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg-128.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc --mtriple=loongarch64 -mattr=+d,-scq < %s | FileCheck %s --check-prefix=LA64
-; RUN: llc --mtriple=loongarch64 -mattr=+d,+scq < %s | FileCheck %s --check-prefix=LA64-SCQ
+; RUN: llc --mtriple=loongarch64 -mattr=+d,-scq,-ld-seq-sa < %s | FileCheck %s --check-prefix=LA64
+; RUN: llc --mtriple=loongarch64 -mattr=+d,+scq,-ld-seq-sa < %s | FileCheck %s --check-prefixes=LA64-SCQ,NO-LD-SEQ-SA
+; RUN: llc --mtriple=loongarch64 -mattr=+d,+scq,+ld-seq-sa < %s | FileCheck %s --check-prefixes=LA64-SCQ,LD-SEQ-SA
define void @cmpxchg_i128_acquire_acquire(ptr %ptr, i128 %cmp, i128 %val) nounwind {
; LA64-LABEL: cmpxchg_i128_acquire_acquire:
@@ -182,22 +183,38 @@ define void @cmpxchg_i128_monotonic_monotonic(ptr %ptr, i128 %cmp, i128 %val) no
; LA64-NEXT: addi.d $sp, $sp, 32
; LA64-NEXT: ret
;
-; LA64-SCQ-LABEL: cmpxchg_i128_monotonic_monotonic:
-; LA64-SCQ: # %bb.0:
-; LA64-SCQ-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
-; LA64-SCQ-NEXT: ll.d $a5, $a0, 0
-; LA64-SCQ-NEXT: ld.d $a6, $a0, 8
-; LA64-SCQ-NEXT: bne $a5, $a1, .LBB4_3
-; LA64-SCQ-NEXT: bne $a6, $a2, .LBB4_3
-; LA64-SCQ-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
-; LA64-SCQ-NEXT: move $a7, $a3
-; LA64-SCQ-NEXT: sc.q $a7, $a4, $a0
-; LA64-SCQ-NEXT: beqz $a7, .LBB4_1
-; LA64-SCQ-NEXT: b .LBB4_4
-; LA64-SCQ-NEXT: .LBB4_3:
-; LA64-SCQ-NEXT: dbar 1792
-; LA64-SCQ-NEXT: .LBB4_4:
-; LA64-SCQ-NEXT: ret
+; NO-LD-SEQ-SA-LABEL: cmpxchg_i128_monotonic_monotonic:
+; NO-LD-SEQ-SA: # %bb.0:
+; NO-LD-SEQ-SA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; NO-LD-SEQ-SA-NEXT: ll.d $a5, $a0, 0
+; NO-LD-SEQ-SA-NEXT: ld.d $a6, $a0, 8
+; NO-LD-SEQ-SA-NEXT: bne $a5, $a1, .LBB4_3
+; NO-LD-SEQ-SA-NEXT: bne $a6, $a2, .LBB4_3
+; NO-LD-SEQ-SA-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; NO-LD-SEQ-SA-NEXT: move $a7, $a3
+; NO-LD-SEQ-SA-NEXT: sc.q $a7, $a4, $a0
+; NO-LD-SEQ-SA-NEXT: beqz $a7, .LBB4_1
+; NO-LD-SEQ-SA-NEXT: b .LBB4_4
+; NO-LD-SEQ-SA-NEXT: .LBB4_3:
+; NO-LD-SEQ-SA-NEXT: dbar 1792
+; NO-LD-SEQ-SA-NEXT: .LBB4_4:
+; NO-LD-SEQ-SA-NEXT: ret
+;
+; LD-SEQ-SA-LABEL: cmpxchg_i128_monotonic_monotonic:
+; LD-SEQ-SA: # %bb.0:
+; LD-SEQ-SA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; LD-SEQ-SA-NEXT: ll.d $a5, $a0, 0
+; LD-SEQ-SA-NEXT: ld.d $a6, $a0, 8
+; LD-SEQ-SA-NEXT: bne $a5, $a1, .LBB4_3
+; LD-SEQ-SA-NEXT: bne $a6, $a2, .LBB4_3
+; LD-SEQ-SA-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; LD-SEQ-SA-NEXT: move $a7, $a3
+; LD-SEQ-SA-NEXT: sc.q $a7, $a4, $a0
+; LD-SEQ-SA-NEXT: beqz $a7, .LBB4_1
+; LD-SEQ-SA-NEXT: b .LBB4_4
+; LD-SEQ-SA-NEXT: .LBB4_3:
+; LD-SEQ-SA-NEXT: .LBB4_4:
+; LD-SEQ-SA-NEXT: ret
%res = cmpxchg ptr %ptr, i128 %cmp, i128 %val monotonic monotonic
ret void
}
@@ -221,24 +238,42 @@ define i128 @cmpxchg_i128_monotonic_monotonic_reti128(ptr %ptr, i128 %cmp, i128
; LA64-NEXT: addi.d $sp, $sp, 32
; LA64-NEXT: ret
;
-; LA64-SCQ-LABEL: cmpxchg_i128_monotonic_monotonic_reti128:
-; LA64-SCQ: # %bb.0:
-; LA64-SCQ-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
-; LA64-SCQ-NEXT: ll.d $a5, $a0, 0
-; LA64-SCQ-NEXT: ld.d $a6, $a0, 8
-; LA64-SCQ-NEXT: bne $a5, $a1, .LBB5_3
-; LA64-SCQ-NEXT: bne $a6, $a2, .LBB5_3
-; LA64-SCQ-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1
-; LA64-SCQ-NEXT: move $a7, $a3
-; LA64-SCQ-NEXT: sc.q $a7, $a4, $a0
-; LA64-SCQ-NEXT: beqz $a7, .LBB5_1
-; LA64-SCQ-NEXT: b .LBB5_4
-; LA64-SCQ-NEXT: .LBB5_3:
-; LA64-SCQ-NEXT: dbar 1792
-; LA64-SCQ-NEXT: .LBB5_4:
-; LA64-SCQ-NEXT: move $a0, $a5
-; LA64-SCQ-NEXT: move $a1, $a6
-; LA64-SCQ-NEXT: ret
+; NO-LD-SEQ-SA-LABEL: cmpxchg_i128_monotonic_monotonic_reti128:
+; NO-LD-SEQ-SA: # %bb.0:
+; NO-LD-SEQ-SA-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; NO-LD-SEQ-SA-NEXT: ll.d $a5, $a0, 0
+; NO-LD-SEQ-SA-NEXT: ld.d $a6, $a0, 8
+; NO-LD-SEQ-SA-NEXT: bne $a5, $a1, .LBB5_3
+; NO-LD-SEQ-SA-NEXT: bne $a6, $a2, .LBB5_3
+; NO-LD-SEQ-SA-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; NO-LD-SEQ-SA-NEXT: move $a7, $a3
+; NO-LD-SEQ-SA-NEXT: sc.q $a7, $a4, $a0
+; NO-LD-SEQ-SA-NEXT: beqz $a7, .LBB5_1
+; NO-LD-SEQ-SA-NEXT: b .LBB5_4
+; NO-LD-SEQ-SA-NEXT: .LBB5_3:
+; NO-LD-SEQ-SA-NEXT: dbar 1792
+; NO-LD-SEQ-SA-NEXT: .LBB5_4:
+; NO-LD-SEQ-SA-NEXT: move $a0, $a5
+; NO-LD-SEQ-SA-NEXT: move $a1, $a6
+; NO-LD-SEQ-SA-NEXT: ret
+;
+; LD-SEQ-SA-LABEL: cmpxchg_i128_monotonic_monotonic_reti128:
+; LD-SEQ-SA: # %bb.0:
+; LD-SEQ-SA-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; LD-SEQ-SA-NEXT: ll.d $a5, $a0, 0
+; LD-SEQ-SA-NEXT: ld.d $a6, $a0, 8
+; LD-SEQ-SA-NEXT: bne $a5, $a1, .LBB5_3
+; LD-SEQ-SA-NEXT: bne $a6, $a2, .LBB5_3
+; LD-SEQ-SA-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; LD-SEQ-SA-NEXT: move $a7, $a3
+; LD-SEQ-SA-NEXT: sc.q $a7, $a4, $a0
+; LD-SEQ-SA-NEXT: beqz $a7, .LBB5_1
+; LD-SEQ-SA-NEXT: b .LBB5_4
+; LD-SEQ-SA-NEXT: .LBB5_3:
+; LD-SEQ-SA-NEXT: .LBB5_4:
+; LD-SEQ-SA-NEXT: move $a0, $a5
+; LD-SEQ-SA-NEXT: move $a1, $a6
+; LD-SEQ-SA-NEXT: ret
%tmp = cmpxchg ptr %ptr, i128 %cmp, i128 %val monotonic monotonic
%res = extractvalue { i128, i1 } %tmp, 0
ret i128 %res
@@ -261,26 +296,46 @@ define i1 @cmpxchg_i128_monotonic_monotonic_reti1(ptr %ptr, i128 %cmp, i128 %val
; LA64-NEXT: addi.d $sp, $sp, 32
; LA64-NEXT: ret
;
-; LA64-SCQ-LABEL: cmpxchg_i128_monotonic_monotonic_reti1:
-; LA64-SCQ: # %bb.0:
-; LA64-SCQ-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
-; LA64-SCQ-NEXT: ll.d $a5, $a0, 0
-; LA64-SCQ-NEXT: ld.d $a6, $a0, 8
-; LA64-SCQ-NEXT: bne $a5, $a1, .LBB6_3
-; LA64-SCQ-NEXT: bne $a6, $a2, .LBB6_3
-; LA64-SCQ-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1
-; LA64-SCQ-NEXT: move $a7, $a3
-; LA64-SCQ-NEXT: sc.q $a7, $a4, $a0
-; LA64-SCQ-NEXT: beqz $a7, .LBB6_1
-; LA64-SCQ-NEXT: b .LBB6_4
-; LA64-SCQ-NEXT: .LBB6_3:
-; LA64-SCQ-NEXT: dbar 1792
-; LA64-SCQ-NEXT: .LBB6_4:
-; LA64-SCQ-NEXT: xor $a0, $a6, $a2
-; LA64-SCQ-NEXT: xor $a1, $a5, $a1
-; LA64-SCQ-NEXT: or $a0, $a1, $a0
-; LA64-SCQ-NEXT: sltui $a0, $a0, 1
-; LA64-SCQ-NEXT: ret
+; NO-LD-SEQ-SA-LABEL: cmpxchg_i128_monotonic_monotonic_reti1:
+; NO-LD-SEQ-SA: # %bb.0:
+; NO-LD-SEQ-SA-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
+; NO-LD-SEQ-SA-NEXT: ll.d $a5, $a0, 0
+; NO-LD-SEQ-SA-NEXT: ld.d $a6, $a0, 8
+; NO-LD-SEQ-SA-NEXT: bne $a5, $a1, .LBB6_3
+; NO-LD-SEQ-SA-NEXT: bne $a6, $a2, .LBB6_3
+; NO-LD-SEQ-SA-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; NO-LD-SEQ-SA-NEXT: move $a7, $a3
+; NO-LD-SEQ-SA-NEXT: sc.q $a7, $a4, $a0
+; NO-LD-SEQ-SA-NEXT: beqz $a7, .LBB6_1
+; NO-LD-SEQ-SA-NEXT: b .LBB6_4
+; NO-LD-SEQ-SA-NEXT: .LBB6_3:
+; NO-LD-SEQ-SA-NEXT: dbar 1792
+; NO-LD-SEQ-SA-NEXT: .LBB6_4:
+; NO-LD-SEQ-SA-NEXT: xor $a0, $a6, $a2
+; NO-LD-SEQ-SA-NEXT: xor $a1, $a5, $a1
+; NO-LD-SEQ-SA-NEXT: or $a0, $a1, $a0
+; NO-LD-SEQ-SA-NEXT: sltui $a0, $a0, 1
+; NO-LD-SEQ-SA-NEXT: ret
+;
+; LD-SEQ-SA-LABEL: cmpxchg_i128_monotonic_monotonic_reti1:
+; LD-SEQ-SA: # %bb.0:
+; LD-SEQ-SA-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
+; LD-SEQ-SA-NEXT: ll.d $a5, $a0, 0
+; LD-SEQ-SA-NEXT: ld.d $a6, $a0, 8
+; LD-SEQ-SA-NEXT: bne $a5, $a1, .LBB6_3
+; LD-SEQ-SA-NEXT: bne $a6, $a2, .LBB6_3
+; LD-SEQ-SA-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; LD-SEQ-SA-NEXT: move $a7, $a3
+; LD-SEQ-SA-NEXT: sc.q $a7, $a4, $a0
+; LD-SEQ-SA-NEXT: beqz $a7, .LBB6_1
+; LD-SEQ-SA-NEXT: b .LBB6_4
+; LD-SEQ-SA-NEXT: .LBB6_3:
+; LD-SEQ-SA-NEXT: .LBB6_4:
+; LD-SEQ-SA-NEXT: xor $a0, $a6, $a2
+; LD-SEQ-SA-NEXT: xor $a1, $a5, $a1
+; LD-SEQ-SA-NEXT: or $a0, $a1, $a0
+; LD-SEQ-SA-NEXT: sltui $a0, $a0, 1
+; LD-SEQ-SA-NEXT: ret
%tmp = cmpxchg ptr %ptr, i128 %cmp, i128 %val monotonic monotonic
%res = extractvalue { i128, i1 } %tmp, 1
ret i1 %res
More information about the llvm-commits
mailing list