[clang] [llvm] [LoongArch] Support LA V1.1 feature that div.w[u] and mod.w[u] instructions with inputs not signed-extended. (PR #116764)

via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 26 00:52:55 PST 2024


https://github.com/tangaac updated https://github.com/llvm/llvm-project/pull/116764

>From a01c311e639770fb5815c3ddfe93a215332d5181 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Tue, 19 Nov 2024 15:04:29 +0800
Subject: [PATCH 1/5] [LoongArch] Support LA V1.1 feature that div.w[u] and
 mod.w[u] instructions with inputs not signed-extended.

---
 clang/include/clang/Driver/Options.td         |  4 +++
 clang/lib/Basic/Targets/LoongArch.cpp         |  8 ++++-
 clang/lib/Basic/Targets/LoongArch.h           |  2 ++
 .../lib/Driver/ToolChains/Arch/LoongArch.cpp  |  9 ++++++
 clang/test/Driver/loongarch-march.c           |  8 ++---
 clang/test/Driver/loongarch-mdiv32.c          | 30 +++++++++++++++++++
 clang/test/Preprocessor/init-loongarch.c      | 27 +++++++++++------
 .../TargetParser/LoongArchTargetParser.def    |  3 +-
 .../llvm/TargetParser/LoongArchTargetParser.h |  2 ++
 llvm/lib/Target/LoongArch/LoongArch.td        |  9 +++++-
 .../LoongArch/LoongArchISelLowering.cpp       | 16 ++++++++--
 .../Target/LoongArch/LoongArchISelLowering.h  |  2 ++
 .../Target/LoongArch/LoongArchInstrInfo.td    |  4 +++
 .../TargetParser/LoongArchTargetParser.cpp    |  1 +
 .../ir-instruction/sdiv-udiv-srem-urem.ll     | 24 +++++++--------
 15 files changed, 119 insertions(+), 30 deletions(-)
 create mode 100644 clang/test/Driver/loongarch-mdiv32.c

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 40fd48761928b3..7e3f6aaaa10dbe 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5417,6 +5417,10 @@ def mld_seq_sa : Flag<["-"], "mld-seq-sa">, Group<m_loongarch_Features_Group>,
   HelpText<"Do not generate load-load barrier instructions (dbar 0x700)">;
 def mno_ld_seq_sa : Flag<["-"], "mno-ld-seq-sa">, Group<m_loongarch_Features_Group>,
   HelpText<"Generate load-load barrier instructions (dbar 0x700)">;
+def mdiv32 : Flag<["-"], "mdiv32">, Group<m_loongarch_Features_Group>,
+  HelpText<"Use div.w[u] and mod.w[u] instructions with input not sign-extended.">;
+def mno_div32 : Flag<["-"], "mno-div32">, Group<m_loongarch_Features_Group>,
+  HelpText<"Do not use div.w[u] and mod.w[u] instructions with input not sign-extended.">;
 def mannotate_tablejump : Flag<["-"], "mannotate-tablejump">, Group<m_loongarch_Features_Group>,
   HelpText<"Enable annotate table jump instruction to correlate it with the jump table.">;
 def mno_annotate_tablejump : Flag<["-"], "mno-annotate-tablejump">, Group<m_loongarch_Features_Group>,
diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp
index 3f2d7317532aaf..9ed695358b1ca8 100644
--- a/clang/lib/Basic/Targets/LoongArch.cpp
+++ b/clang/lib/Basic/Targets/LoongArch.cpp
@@ -205,7 +205,8 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts,
       // TODO: As more features of the V1.1 ISA are supported, a unified "v1.1"
       // arch feature set will be used to include all sub-features belonging to
       // the V1.1 ISA version.
-      if (HasFeatureFrecipe && HasFeatureLAM_BH && HasFeatureLD_SEQ_SA)
+      if (HasFeatureFrecipe && HasFeatureLAM_BH && HasFeatureLD_SEQ_SA &&
+          HasFeatureDiv32)
         Builder.defineMacro("__loongarch_arch",
                             Twine('"') + "la64v1.1" + Twine('"'));
       else
@@ -242,6 +243,9 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts,
   if (HasFeatureLD_SEQ_SA)
     Builder.defineMacro("__loongarch_ld_seq_sa", Twine(1));
 
+  if (HasFeatureDiv32)
+    Builder.defineMacro("__loongarch_div32", Twine(1));
+
   StringRef ABI = getABI();
   if (ABI == "lp64d" || ABI == "lp64f" || ABI == "lp64s")
     Builder.defineMacro("__loongarch_lp64");
@@ -322,6 +326,8 @@ bool LoongArchTargetInfo::handleTargetFeatures(
       HasFeatureLAM_BH = true;
     else if (Feature == "+ld-seq-sa")
       HasFeatureLD_SEQ_SA = true;
+    else if (Feature == "+div32")
+      HasFeatureDiv32 = true;
   }
   return true;
 }
diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h
index e5eae7a8fcf677..3002a0bbc4491f 100644
--- a/clang/lib/Basic/Targets/LoongArch.h
+++ b/clang/lib/Basic/Targets/LoongArch.h
@@ -32,6 +32,7 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo {
   bool HasFeatureFrecipe;
   bool HasFeatureLAM_BH;
   bool HasFeatureLD_SEQ_SA;
+  bool HasFeatureDiv32;
 
 public:
   LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
@@ -43,6 +44,7 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo {
     HasFeatureFrecipe = false;
     HasFeatureLAM_BH = false;
     HasFeatureLD_SEQ_SA = false;
+    HasFeatureDiv32 = false;
     LongDoubleWidth = 128;
     LongDoubleAlign = 128;
     LongDoubleFormat = &llvm::APFloat::IEEEquad();
diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
index 67b71a3ec623e4..5be57e866d85e8 100644
--- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
@@ -283,6 +283,15 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D,
     else
       Features.push_back("-ld-seq-sa");
   }
+
+  // Select div32 feature determined by -m[no-]div32.
+  if (const Arg *A =
+          Args.getLastArg(options::OPT_mdiv32, options::OPT_mno_div32)) {
+    if (A->getOption().matches(options::OPT_mdiv32))
+      Features.push_back("+div32");
+    else
+      Features.push_back("-div32");
+  }
 }
 
 std::string loongarch::postProcessTargetCPUString(const std::string &CPU,
diff --git a/clang/test/Driver/loongarch-march.c b/clang/test/Driver/loongarch-march.c
index c7091336f3bc80..981ae5c5c7dc1c 100644
--- a/clang/test/Driver/loongarch-march.c
+++ b/clang/test/Driver/loongarch-march.c
@@ -39,21 +39,21 @@
 
 // CC1-LA64V1P1: "-target-cpu" "loongarch64"
 // CC1-LA64V1P1-NOT: "-target-feature"
-// CC1-LA64V1P1: "-target-feature" "+64bit" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh" "-target-feature" "+ld-seq-sa"
+// CC1-LA64V1P1: "-target-feature" "+64bit" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh" "-target-feature" "+ld-seq-sa" "-target-feature" "+div32"
 // CC1-LA64V1P1-NOT: "-target-feature"
 // CC1-LA64V1P1: "-target-abi" "lp64d"
 
 // CC1-LA664: "-target-cpu" "la664"
 // CC1-LA664-NOT: "-target-feature"
-// CC1-LA664: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh" "-target-feature" "+ld-seq-sa"
+// CC1-LA664: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh" "-target-feature" "+ld-seq-sa" "-target-feature" "+div32"
 // CC1-LA664-NOT: "-target-feature"
 // CC1-LA664: "-target-abi" "lp64d"
 
 // IR-LOONGARCH64: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+f,+ual"
 // IR-LA464: attributes #[[#]] ={{.*}}"target-cpu"="la464" {{.*}}"target-features"="+64bit,+d,+f,+lasx,+lsx,+ual"
 // IR-LA64V1P0: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+lsx,+ual"
-// IR-LA64V1P1: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+frecipe,+lam-bh,+ld-seq-sa,+lsx,+ual"
-// IR-LA664: attributes #[[#]] ={{.*}}"target-cpu"="la664" {{.*}}"target-features"="+64bit,+d,+f,+frecipe,+lam-bh,+lasx,+ld-seq-sa,+lsx,+ual"
+// IR-LA64V1P1: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+div32,+frecipe,+lam-bh,+ld-seq-sa,+lsx,+ual"
+// IR-LA664: attributes #[[#]] ={{.*}}"target-cpu"="la664" {{.*}}"target-features"="+64bit,+d,+div32,+f,+frecipe,+lam-bh,+lasx,+ld-seq-sa,+lsx,+ual"
 
 int foo(void) {
   return 3;
diff --git a/clang/test/Driver/loongarch-mdiv32.c b/clang/test/Driver/loongarch-mdiv32.c
new file mode 100644
index 00000000000000..cf774b3818c55d
--- /dev/null
+++ b/clang/test/Driver/loongarch-mdiv32.c
@@ -0,0 +1,30 @@
+/// Test -m[no]div32 options.
+
+// RUN: %clang --target=loongarch64 -mdiv32 -fsyntax-only %s -### 2>&1 | \
+// RUN:     FileCheck %s --check-prefix=CC1-DIV32
+// RUN: %clang --target=loongarch64 -mno-div32 -fsyntax-only %s -### 2>&1 | \
+// RUN:     FileCheck %s --check-prefix=CC1-NO-DIV32
+// RUN: %clang --target=loongarch64 -mno-div32 -mdiv32 -fsyntax-only %s -### 2>&1 | \
+// RUN:     FileCheck %s --check-prefix=CC1-DIV32
+// RUN: %clang --target=loongarch64  -mdiv32 -mno-div32 -fsyntax-only %s -### 2>&1 | \
+// RUN:     FileCheck %s --check-prefix=CC1-NO-DIV32
+
+// RUN: %clang --target=loongarch64 -mdiv32 -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-DIV32
+// RUN: %clang --target=loongarch64 -mno-div32 -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-NO-DIV32
+// RUN: %clang --target=loongarch64 -mno-div32 -mdiv32 -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-DIV32
+// RUN: %clang --target=loongarch64 -mdiv32 -mno-div32 -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-NO-DIV32
+
+
+// CC1-DIV32: "-target-feature" "+div32"
+// CC1-NO-DIV32: "-target-feature" "-div32"
+
+// IR-DIV32: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+div32{{(,.*)?}}"
+// IR-NO-DIV32: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-div32{{(,.*)?}}"
+
+int foo(void) {
+  return 42;
+}
diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c
index 0e3320f01b328c..9045073cbb7893 100644
--- a/clang/test/Preprocessor/init-loongarch.c
+++ b/clang/test/Preprocessor/init-loongarch.c
@@ -798,7 +798,7 @@
 // LA64-FPU0-LP64S-NOT: #define __loongarch_single_float
 // LA64-FPU0-LP64S: #define __loongarch_soft_float 1
 
-/// Check __loongarch_arch{_tune/_frecipe/_lam_bh/_ld_seq_sa}.
+/// Check __loongarch_arch{_tune/_frecipe/_lam_bh/_ld_seq_sa/_div32}.
 
 // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - | \
 // RUN:   FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s
@@ -823,11 +823,11 @@
 // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx | \
 // RUN:   FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s
 // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 | \
-// RUN:   FileCheck --match-full-lines  --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LD-SEQ-SA -DARCH=la64v1.1 -DTUNE=loongarch64 %s
+// RUN:   FileCheck --match-full-lines  --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LD-SEQ-SA,DIV32 -DARCH=la64v1.1 -DTUNE=loongarch64 %s
 // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -frecipe | \
-// RUN:   FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH,LD-SEQ-SA -DARCH=la64v1.0 -DTUNE=loongarch64 %s
+// RUN:   FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH,LD-SEQ-SA,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s
 // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -lsx | \
-// RUN:   FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LD-SEQ-SA -DARCH=loongarch64 -DTUNE=loongarch64 %s
+// RUN:   FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LD-SEQ-SA,DIV32 -DARCH=loongarch64 -DTUNE=loongarch64 %s
 // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +frecipe | \
 // RUN:   FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=loongarch64 -DTUNE=loongarch64 %s
 // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +frecipe | \
@@ -835,7 +835,7 @@
 // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +lam-bh | \
 // RUN:   FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH -DARCH=la64v1.0 -DTUNE=loongarch64 %s
 // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -lam-bh | \
-// RUN:   FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LD-SEQ-SA -DARCH=la64v1.0 -DTUNE=loongarch64 %s
+// RUN:   FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LD-SEQ-SA,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s
 // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lam-bh | \
 // RUN:   FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH -DARCH=loongarch64 -DTUNE=loongarch64 %s
 // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +lam-bh | \
@@ -843,23 +843,32 @@
 // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +ld-seq-sa | \
 // RUN:   FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LD-SEQ-SA -DARCH=la64v1.0 -DTUNE=loongarch64 %s
 // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -ld-seq-sa | \
-// RUN:   FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH -DARCH=la64v1.0 -DTUNE=loongarch64 %s
+// RUN:   FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s
 // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +ld-seq-sa | \
 // RUN:   FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LD-SEQ-SA -DARCH=loongarch64 -DTUNE=loongarch64 %s
 // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +ld-seq-sa | \
 // RUN:   FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LD-SEQ-SA -DARCH=la64v1.0 -DTUNE=loongarch64 %s
-// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +frecipe -Xclang -target-feature -Xclang +lam-bh  -Xclang -target-feature -Xclang +ld-seq-sa | \
+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +div32 | \
+// RUN:   FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s
+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -div32| \
+// RUN:   FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LD-SEQ-SA -DARCH=la64v1.0 -DTUNE=loongarch64 %s
+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +div32 | \
+// RUN:   FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,DIV32 -DARCH=loongarch64 -DTUNE=loongarch64 %s
+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +div32 | \
+// RUN:   FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s
+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +frecipe -Xclang -target-feature -Xclang +lam-bh  -Xclang -target-feature -Xclang +ld-seq-sa -Xclang -target-feature -Xclang +div32 | \
 // RUN:   FileCheck --match-full-lines --check-prefixes=ARCH-TUNE -DARCH=la64v1.1 -DTUNE=loongarch64 %s
 // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 | \
-// RUN:   FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LD-SEQ-SA -DARCH=la664 -DTUNE=la664 %s
+// RUN:   FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LD-SEQ-SA,DIV32 -DARCH=la664 -DTUNE=la664 %s
 // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -mtune=la664 | \
 // RUN:   FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=la664 %s
 // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -mtune=la664 | \
 // RUN:   FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la664 %s
 // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 -mtune=loongarch64 | \
-// RUN:   FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LD-SEQ-SA -DARCH=la664 -DTUNE=loongarch64 %s
+// RUN:   FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LD-SEQ-SA,DIV32 -DARCH=la664 -DTUNE=loongarch64 %s
 
 // ARCH-TUNE: #define __loongarch_arch "[[ARCH]]"
+// DIV32: #define __loongarch_div32 1
 // FRECIPE: #define __loongarch_frecipe 1
 // LAM-BH: #define __loongarch_lam_bh 1
 // LD-SEQ-SA: #define __loongarch_ld_seq_sa 1
diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def
index 324d5c18e6dea3..e3285f89ef9eab 100644
--- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def
+++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def
@@ -13,6 +13,7 @@ LOONGARCH_FEATURE("+ual", FK_UAL)
 LOONGARCH_FEATURE("+frecipe", FK_FRECIPE)
 LOONGARCH_FEATURE("+lam-bh", FK_LAM_BH)
 LOONGARCH_FEATURE("+ld-seq-sa", FK_LD_SEQ_SA)
+LOONGARCH_FEATURE("+div32", FK_DIV32)
 
 #undef LOONGARCH_FEATURE
 
@@ -22,6 +23,6 @@ LOONGARCH_FEATURE("+ld-seq-sa", FK_LD_SEQ_SA)
 
 LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64 | FK_UAL)
 LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL)
-LOONGARCH_ARCH("la664", AK_LA664, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL | FK_FRECIPE | FK_LAM_BH | FK_LD_SEQ_SA)
+LOONGARCH_ARCH("la664", AK_LA664, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL | FK_FRECIPE | FK_LAM_BH | FK_LD_SEQ_SA | FK_DIV32)
 
 #undef LOONGARCH_ARCH
diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h
index 00957b84ab576c..5862becc92d774 100644
--- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h
+++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h
@@ -57,6 +57,8 @@ enum FeatureKind : uint32_t {
   // Do not generate load-load barrier instructions (dbar 0x700).
   FK_LD_SEQ_SA = 1 << 12,
 
+  // Assume div.w[u] and mod.w[u] can handle inputs that are not sign-extended.
+  FK_DIV32 = 1 << 13,
 };
 
 struct FeatureInfo {
diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td
index 100bdba36c440c..aa1741b0ccbaf5 100644
--- a/llvm/lib/Target/LoongArch/LoongArch.td
+++ b/llvm/lib/Target/LoongArch/LoongArch.td
@@ -123,6 +123,12 @@ def FeatureLD_SEQ_SA
                         "Don't use load-load barrier (dbar 0x700).">;
 def HasLD_SEQ_SA : Predicate<"Subtarget->hasLD_SEQ_SA()">;
 
+// Assume div.w[u] and mod.w[u] can handle inputs that are not sign-extended.
+def FeatureDiv32
+    : SubtargetFeature<"div32", "HasDiv32", "true",
+                        "Support div.w[u] and mod.w[u] can handle inputs that are not sign-extended">;
+def HasDiv32 : Predicate<"Subtarget->hasDiv32()">;
+
 def TunePreferWInst
     : SubtargetFeature<"prefer-w-inst", "PreferWInst", "true",
                        "Prefer instructions with W suffix">;
@@ -165,7 +171,8 @@ def : ProcessorModel<"la664", NoSchedModel, [Feature64Bit,
                                              FeatureExtLVZ,
                                              FeatureExtLBT,
                                              FeatureFrecipe,
-                                             FeatureLAM_BH]>;
+                                             FeatureLAM_BH,
+                                             FeatureDiv32]>;
 
 //===----------------------------------------------------------------------===//
 // Define the LoongArch target.
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 1abb428175eea7..c84619622270d5 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -141,7 +141,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
 
     setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
     setOperationAction(ISD::BSWAP, MVT::i32, Custom);
-    setOperationAction({ISD::UDIV, ISD::UREM}, MVT::i32, Custom);
+    setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32,
+                       Custom);
     setOperationAction(ISD::LROUND, MVT::i32, Custom);
   }
 
@@ -2631,8 +2632,12 @@ static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
   switch (Opcode) {
   default:
     llvm_unreachable("Unexpected opcode");
+  case ISD::SDIV:
+    return LoongArchISD::DIV_W;
   case ISD::UDIV:
     return LoongArchISD::DIV_WU;
+  case ISD::SREM:
+    return LoongArchISD::MOD_W;
   case ISD::UREM:
     return LoongArchISD::MOD_WU;
   case ISD::SHL:
@@ -2829,11 +2834,16 @@ void LoongArchTargetLowering::ReplaceNodeResults(
            "Unexpected custom legalisation");
     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
     break;
+  case ISD::SDIV:
   case ISD::UDIV:
+  case ISD::SREM:
   case ISD::UREM:
     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
            "Unexpected custom legalisation");
-    Results.push_back(customLegalizeToWOp(N, DAG, 2, ISD::SIGN_EXTEND));
+    Results.push_back(customLegalizeToWOp(N, DAG, 2,
+                                          Subtarget.hasDiv32() && VT == MVT::i32
+                                              ? ISD::ANY_EXTEND
+                                              : ISD::SIGN_EXTEND));
     break;
   case ISD::SHL:
   case ISD::SRA:
@@ -4669,7 +4679,9 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
     NODE_NAME_CASE(BITREV_W)
     NODE_NAME_CASE(ROTR_W)
     NODE_NAME_CASE(ROTL_W)
+    NODE_NAME_CASE(DIV_W)
     NODE_NAME_CASE(DIV_WU)
+    NODE_NAME_CASE(MOD_W)
     NODE_NAME_CASE(MOD_WU)
     NODE_NAME_CASE(CLZ_W)
     NODE_NAME_CASE(CTZ_W)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 1aa686695b49b8..5a47dfb257175f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -44,6 +44,8 @@ enum NodeType : unsigned {
   ROTR_W,
 
   // unsigned 32-bit integer division
+  DIV_W,
+  MOD_W,
   DIV_WU,
   MOD_WU,
 
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 7993f4f1326937..0d290e10651c60 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -85,7 +85,9 @@ def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>;
 def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>;
 def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>;
 def loongarch_rotr_w : SDNode<"LoongArchISD::ROTR_W", SDT_LoongArchIntBinOpW>;
+def loongarch_div_w : SDNode<"LoongArchISD::DIV_W", SDT_LoongArchIntBinOpW>;
 def loongarch_div_wu : SDNode<"LoongArchISD::DIV_WU", SDT_LoongArchIntBinOpW>;
+def loongarch_mod_w : SDNode<"LoongArchISD::MOD_W", SDT_LoongArchIntBinOpW>;
 def loongarch_mod_wu : SDNode<"LoongArchISD::MOD_WU", SDT_LoongArchIntBinOpW>;
 def loongarch_crc_w_b_w
     : SDNode<"LoongArchISD::CRC_W_B_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
@@ -1156,10 +1158,12 @@ def : PatGprGpr<sub, SUB_D>;
 def : PatGprGpr<sdiv, DIV_D>;
 def : PatGprGpr_32<sdiv, DIV_W>;
 def : PatGprGpr<udiv, DIV_DU>;
+def : PatGprGpr<loongarch_div_w, DIV_W>;
 def : PatGprGpr<loongarch_div_wu, DIV_WU>;
 def : PatGprGpr<srem, MOD_D>;
 def : PatGprGpr_32<srem, MOD_W>;
 def : PatGprGpr<urem, MOD_DU>;
+def : PatGprGpr<loongarch_mod_w, MOD_W>;
 def : PatGprGpr<loongarch_mod_wu, MOD_WU>;
 def : PatGprGpr<shiftop<rotr>, ROTR_D>;
 def : PatGprGpr<shiftopw<loongarch_rotr_w>, ROTR_W>;
diff --git a/llvm/lib/TargetParser/LoongArchTargetParser.cpp b/llvm/lib/TargetParser/LoongArchTargetParser.cpp
index 9b8407a73bea3f..8e7681d526cef5 100644
--- a/llvm/lib/TargetParser/LoongArchTargetParser.cpp
+++ b/llvm/lib/TargetParser/LoongArchTargetParser.cpp
@@ -54,6 +54,7 @@ bool LoongArch::getArchFeatures(StringRef Arch,
       Features.push_back("+frecipe");
       Features.push_back("+lam-bh");
       Features.push_back("+ld-seq-sa");
+      Features.push_back("+div32");
     }
     return true;
   }
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
index c5af79157eaadc..99824f6d7718e7 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
@@ -121,7 +121,7 @@ define i32 @sdiv_i32(i32 %a, i32 %b) {
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:    addi.w $a0, $a0, 0
-; LA64-NEXT:    div.d $a0, $a0, $a1
+; LA64-NEXT:    div.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: sdiv_i32:
@@ -137,7 +137,7 @@ define i32 @sdiv_i32(i32 %a, i32 %b) {
 ; LA64-TRAP:       # %bb.0: # %entry
 ; LA64-TRAP-NEXT:    addi.w $a1, $a1, 0
 ; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT:    div.d $a0, $a0, $a1
+; LA64-TRAP-NEXT:    div.w $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB3_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
@@ -156,7 +156,7 @@ define i32 @sdiv_ui32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-LABEL: sdiv_ui32_si32_si32:
 ; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    div.d $a0, $a0, $a1
+; LA64-NEXT:    div.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: sdiv_ui32_si32_si32:
@@ -170,7 +170,7 @@ define i32 @sdiv_ui32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-TRAP-LABEL: sdiv_ui32_si32_si32:
 ; LA64-TRAP:       # %bb.0: # %entry
-; LA64-TRAP-NEXT:    div.d $a0, $a0, $a1
+; LA64-TRAP-NEXT:    div.w $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB4_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
@@ -693,7 +693,7 @@ define i32 @srem_i32(i32 %a, i32 %b) {
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:    addi.w $a0, $a0, 0
-; LA64-NEXT:    mod.d $a0, $a0, $a1
+; LA64-NEXT:    mod.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: srem_i32:
@@ -709,7 +709,7 @@ define i32 @srem_i32(i32 %a, i32 %b) {
 ; LA64-TRAP:       # %bb.0: # %entry
 ; LA64-TRAP-NEXT:    addi.w $a1, $a1, 0
 ; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT:    mod.d $a0, $a0, $a1
+; LA64-TRAP-NEXT:    mod.w $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB19_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
@@ -728,7 +728,7 @@ define i32 @srem_ui32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-LABEL: srem_ui32_si32_si32:
 ; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    mod.d $a0, $a0, $a1
+; LA64-NEXT:    mod.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: srem_ui32_si32_si32:
@@ -742,7 +742,7 @@ define i32 @srem_ui32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-TRAP-LABEL: srem_ui32_si32_si32:
 ; LA64-TRAP:       # %bb.0: # %entry
-; LA64-TRAP-NEXT:    mod.d $a0, $a0, $a1
+; LA64-TRAP-NEXT:    mod.w $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB20_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
@@ -763,7 +763,7 @@ define signext i32 @srem_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:    addi.w $a0, $a0, 0
-; LA64-NEXT:    mod.d $a0, $a0, $a1
+; LA64-NEXT:    mod.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: srem_si32_ui32_ui32:
@@ -779,7 +779,7 @@ define signext i32 @srem_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64-TRAP:       # %bb.0: # %entry
 ; LA64-TRAP-NEXT:    addi.w $a1, $a1, 0
 ; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT:    mod.d $a0, $a0, $a1
+; LA64-TRAP-NEXT:    mod.w $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB21_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
@@ -798,7 +798,7 @@ define signext i32 @srem_si32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-LABEL: srem_si32_si32_si32:
 ; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    mod.d $a0, $a0, $a1
+; LA64-NEXT:    mod.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: srem_si32_si32_si32:
@@ -812,7 +812,7 @@ define signext i32 @srem_si32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-TRAP-LABEL: srem_si32_si32_si32:
 ; LA64-TRAP:       # %bb.0: # %entry
-; LA64-TRAP-NEXT:    mod.d $a0, $a0, $a1
+; LA64-TRAP-NEXT:    mod.w $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB22_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7

>From 1d0477f71ca9ee4c7e3209f4751b47da4ac66ffd Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Wed, 20 Nov 2024 14:36:37 +0800
Subject: [PATCH 2/5] add a test

---
 .../sdiv-udiv-srem-urem-div32.ll              | 75 +++++++++++++++++++
 1 file changed, 75 insertions(+)
 create mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem-div32.ll

diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem-div32.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem-div32.ll
new file mode 100644
index 00000000000000..d22b4e9c30fd61
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem-div32.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch64 -mattr=+d,-div32 < %s | FileCheck %s --check-prefix=LA64
+; RUN: llc --mtriple=loongarch64 -mattr=+d,+div32 < %s | FileCheck %s --check-prefix=LA64-DIV32
+
+define dso_local noundef signext i32 @divw(i64 noundef %0, i64 noundef %1) local_unnamed_addr #0 {
+; LA64-LABEL: divw:
+; LA64:       # %bb.0:
+; LA64-NEXT:    addi.w $a1, $a1, 0
+; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    div.w $a0, $a0, $a1
+; LA64-NEXT:    ret
+;
+; LA64-DIV32-LABEL: divw:
+; LA64-DIV32:       # %bb.0:
+; LA64-DIV32-NEXT:    div.w $a0, $a0, $a1
+; LA64-DIV32-NEXT:    ret
+  %3 = trunc i64 %0 to i32
+  %4 = trunc i64 %1 to i32
+  %5 = sdiv i32 %3, %4
+  ret i32 %5
+}
+
+define dso_local noundef signext i32 @divwu(i64 noundef %0, i64 noundef %1) local_unnamed_addr #0 {
+; LA64-LABEL: divwu:
+; LA64:       # %bb.0:
+; LA64-NEXT:    addi.w $a1, $a1, 0
+; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    div.wu $a0, $a0, $a1
+; LA64-NEXT:    ret
+;
+; LA64-DIV32-LABEL: divwu:
+; LA64-DIV32:       # %bb.0:
+; LA64-DIV32-NEXT:    div.wu $a0, $a0, $a1
+; LA64-DIV32-NEXT:    ret
+  %3 = trunc i64 %0 to i32
+  %4 = trunc i64 %1 to i32
+  %5 = udiv i32 %3, %4
+  ret i32 %5
+}
+
+define dso_local signext range(i32 -2147483647, -2147483648) i32 @modw(i64 noundef %0, i64 noundef %1) local_unnamed_addr #0 {
+; LA64-LABEL: modw:
+; LA64:       # %bb.0:
+; LA64-NEXT:    addi.w $a1, $a1, 0
+; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    mod.w $a0, $a0, $a1
+; LA64-NEXT:    ret
+;
+; LA64-DIV32-LABEL: modw:
+; LA64-DIV32:       # %bb.0:
+; LA64-DIV32-NEXT:    mod.w $a0, $a0, $a1
+; LA64-DIV32-NEXT:    ret
+  %3 = trunc i64 %0 to i32
+  %4 = trunc i64 %1 to i32
+  %5 = srem i32 %3, %4
+  ret i32 %5
+}
+
+define dso_local signext range(i32 0, -1) i32 @modwu(i64 noundef %0, i64 noundef %1) local_unnamed_addr #0 {
+; LA64-LABEL: modwu:
+; LA64:       # %bb.0:
+; LA64-NEXT:    addi.w $a1, $a1, 0
+; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    mod.wu $a0, $a0, $a1
+; LA64-NEXT:    ret
+;
+; LA64-DIV32-LABEL: modwu:
+; LA64-DIV32:       # %bb.0:
+; LA64-DIV32-NEXT:    mod.wu $a0, $a0, $a1
+; LA64-DIV32-NEXT:    ret
+  %3 = trunc i64 %0 to i32
+  %4 = trunc i64 %1 to i32
+  %5 = urem i32 %3, %4
+  ret i32 %5
+}

>From b2bc7fada7ad999d79d5e3b6d61eee49ca9dbbec Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Wed, 20 Nov 2024 17:12:35 +0800
Subject: [PATCH 3/5] remove uncessary ir attributes

---
 .../LoongArch/ir-instruction/sdiv-udiv-srem-urem-div32.ll | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem-div32.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem-div32.ll
index d22b4e9c30fd61..6b66cc0001af27 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem-div32.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem-div32.ll
@@ -2,7 +2,7 @@
 ; RUN: llc --mtriple=loongarch64 -mattr=+d,-div32 < %s | FileCheck %s --check-prefix=LA64
 ; RUN: llc --mtriple=loongarch64 -mattr=+d,+div32 < %s | FileCheck %s --check-prefix=LA64-DIV32
 
-define dso_local noundef signext i32 @divw(i64 noundef %0, i64 noundef %1) local_unnamed_addr #0 {
+define i32 @divw(i64 %0, i64 %1) {
 ; LA64-LABEL: divw:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    addi.w $a1, $a1, 0
@@ -20,7 +20,7 @@ define dso_local noundef signext i32 @divw(i64 noundef %0, i64 noundef %1) local
   ret i32 %5
 }
 
-define dso_local noundef signext i32 @divwu(i64 noundef %0, i64 noundef %1) local_unnamed_addr #0 {
+define i32  @divwu(i64 %0, i64 %1) {
 ; LA64-LABEL: divwu:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    addi.w $a1, $a1, 0
@@ -38,7 +38,7 @@ define dso_local noundef signext i32 @divwu(i64 noundef %0, i64 noundef %1) loca
   ret i32 %5
 }
 
-define dso_local signext range(i32 -2147483647, -2147483648) i32 @modw(i64 noundef %0, i64 noundef %1) local_unnamed_addr #0 {
+define i32 @modw(i64 %0, i64 %1) {
 ; LA64-LABEL: modw:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    addi.w $a1, $a1, 0
@@ -56,7 +56,7 @@ define dso_local signext range(i32 -2147483647, -2147483648) i32 @modw(i64 nound
   ret i32 %5
 }
 
-define dso_local signext range(i32 0, -1) i32 @modwu(i64 noundef %0, i64 noundef %1) local_unnamed_addr #0 {
+define i32 @modwu(i64 %0, i64 %1) {
 ; LA64-LABEL: modwu:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    addi.w $a1, $a1, 0

>From 97dcea32f4ed46492a0c8eb0dfc2609f90b933e8 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Tue, 26 Nov 2024 10:56:01 +0800
Subject: [PATCH 4/5] remove LoongArchISD:{DIV_W,MOD_W} , add div32 support in
 sys::getHostCPUFeatures & update  tests

---
 llvm/lib/Target/LoongArch/LoongArch.td        |  2 +-
 .../LoongArch/LoongArchISelLowering.cpp       | 11 +---
 .../Target/LoongArch/LoongArchISelLowering.h  |  2 -
 .../Target/LoongArch/LoongArchInstrInfo.td    |  4 --
 llvm/lib/TargetParser/Host.cpp                |  2 +-
 .../sdiv-udiv-srem-urem-div32.ll              | 56 ++++++++++---------
 .../ir-instruction/sdiv-udiv-srem-urem.ll     | 24 ++++----
 7 files changed, 45 insertions(+), 56 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td
index aa1741b0ccbaf5..463786e72bdf8d 100644
--- a/llvm/lib/Target/LoongArch/LoongArch.td
+++ b/llvm/lib/Target/LoongArch/LoongArch.td
@@ -126,7 +126,7 @@ def HasLD_SEQ_SA : Predicate<"Subtarget->hasLD_SEQ_SA()">;
 // Assume div.w[u] and mod.w[u] can handle inputs that are not sign-extended.
 def FeatureDiv32
     : SubtargetFeature<"div32", "HasDiv32", "true",
-                        "Support div.w[u] and mod.w[u] can handle inputs that are not sign-extended">;
+                        "Assume div.w[u] and mod.w[u] can handle inputs that are not sign-extended">;
 def HasDiv32 : Predicate<"Subtarget->hasDiv32()">;
 
 def TunePreferWInst
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index c84619622270d5..6c30cc0d6cfb3e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -141,8 +141,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
 
     setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
     setOperationAction(ISD::BSWAP, MVT::i32, Custom);
-    setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32,
-                       Custom);
+    setOperationAction({ISD::UDIV, ISD::UREM}, MVT::i32, Custom);
     setOperationAction(ISD::LROUND, MVT::i32, Custom);
   }
 
@@ -2632,12 +2631,8 @@ static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
   switch (Opcode) {
   default:
     llvm_unreachable("Unexpected opcode");
-  case ISD::SDIV:
-    return LoongArchISD::DIV_W;
   case ISD::UDIV:
     return LoongArchISD::DIV_WU;
-  case ISD::SREM:
-    return LoongArchISD::MOD_W;
   case ISD::UREM:
     return LoongArchISD::MOD_WU;
   case ISD::SHL:
@@ -2834,9 +2829,7 @@ void LoongArchTargetLowering::ReplaceNodeResults(
            "Unexpected custom legalisation");
     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
     break;
-  case ISD::SDIV:
   case ISD::UDIV:
-  case ISD::SREM:
   case ISD::UREM:
     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
            "Unexpected custom legalisation");
@@ -4679,9 +4672,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
     NODE_NAME_CASE(BITREV_W)
     NODE_NAME_CASE(ROTR_W)
     NODE_NAME_CASE(ROTL_W)
-    NODE_NAME_CASE(DIV_W)
     NODE_NAME_CASE(DIV_WU)
-    NODE_NAME_CASE(MOD_W)
     NODE_NAME_CASE(MOD_WU)
     NODE_NAME_CASE(CLZ_W)
     NODE_NAME_CASE(CTZ_W)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 5a47dfb257175f..1aa686695b49b8 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -44,8 +44,6 @@ enum NodeType : unsigned {
   ROTR_W,
 
   // unsigned 32-bit integer division
-  DIV_W,
-  MOD_W,
   DIV_WU,
   MOD_WU,
 
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 0d290e10651c60..7993f4f1326937 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -85,9 +85,7 @@ def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>;
 def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>;
 def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>;
 def loongarch_rotr_w : SDNode<"LoongArchISD::ROTR_W", SDT_LoongArchIntBinOpW>;
-def loongarch_div_w : SDNode<"LoongArchISD::DIV_W", SDT_LoongArchIntBinOpW>;
 def loongarch_div_wu : SDNode<"LoongArchISD::DIV_WU", SDT_LoongArchIntBinOpW>;
-def loongarch_mod_w : SDNode<"LoongArchISD::MOD_W", SDT_LoongArchIntBinOpW>;
 def loongarch_mod_wu : SDNode<"LoongArchISD::MOD_WU", SDT_LoongArchIntBinOpW>;
 def loongarch_crc_w_b_w
     : SDNode<"LoongArchISD::CRC_W_B_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
@@ -1158,12 +1156,10 @@ def : PatGprGpr<sub, SUB_D>;
 def : PatGprGpr<sdiv, DIV_D>;
 def : PatGprGpr_32<sdiv, DIV_W>;
 def : PatGprGpr<udiv, DIV_DU>;
-def : PatGprGpr<loongarch_div_w, DIV_W>;
 def : PatGprGpr<loongarch_div_wu, DIV_WU>;
 def : PatGprGpr<srem, MOD_D>;
 def : PatGprGpr_32<srem, MOD_W>;
 def : PatGprGpr<urem, MOD_DU>;
-def : PatGprGpr<loongarch_mod_w, MOD_W>;
 def : PatGprGpr<loongarch_mod_wu, MOD_WU>;
 def : PatGprGpr<shiftop<rotr>, ROTR_D>;
 def : PatGprGpr<shiftopw<loongarch_rotr_w>, ROTR_W>;
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 51d6b7cb9b1fd6..05d97e03a67c9c 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -2025,12 +2025,12 @@ const StringMap<bool> sys::getHostCPUFeatures() {
   Features["lvz"] = hwcap & (1UL << 9);  // HWCAP_LOONGARCH_LVZ
 
   Features["frecipe"] = cpucfg2 & (1U << 25); // CPUCFG.2.FRECIPE
+  Features["div32"] = cpucfg2 & (1U << 26);   // CPUCFG.2.DIV32
   Features["lam-bh"] = cpucfg2 & (1U << 27);  // CPUCFG.2.LAM_BH
 
   Features["ld-seq-sa"] = cpucfg3 & (1U << 23); // CPUCFG.3.LD_SEQ_SA
 
   // TODO: Need to complete.
-  // Features["div32"] = cpucfg2 & (1U << 26);       // CPUCFG.2.DIV32
   // Features["lamcas"] = cpucfg2 & (1U << 28);      // CPUCFG.2.LAMCAS
   // Features["llacq-screl"] = cpucfg2 & (1U << 29); // CPUCFG.2.LLACQ_SCREL
   // Features["scq"] = cpucfg2 & (1U << 30);         // CPUCFG.2.SCQ
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem-div32.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem-div32.ll
index 6b66cc0001af27..f858ee74568dab 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem-div32.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem-div32.ll
@@ -2,25 +2,27 @@
 ; RUN: llc --mtriple=loongarch64 -mattr=+d,-div32 < %s | FileCheck %s --check-prefix=LA64
 ; RUN: llc --mtriple=loongarch64 -mattr=+d,+div32 < %s | FileCheck %s --check-prefix=LA64-DIV32
 
-define i32 @divw(i64 %0, i64 %1) {
+define i32 @divw(i64 %a, i64 %b) {
 ; LA64-LABEL: divw:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:    addi.w $a0, $a0, 0
-; LA64-NEXT:    div.w $a0, $a0, $a1
+; LA64-NEXT:    addi.w $a1, $a1, 0
+; LA64-NEXT:    div.d $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA64-DIV32-LABEL: divw:
 ; LA64-DIV32:       # %bb.0:
-; LA64-DIV32-NEXT:    div.w $a0, $a0, $a1
+; LA64-DIV32-NEXT:    addi.w $a0, $a0, 0
+; LA64-DIV32-NEXT:    addi.w $a1, $a1, 0
+; LA64-DIV32-NEXT:    div.d $a0, $a0, $a1
 ; LA64-DIV32-NEXT:    ret
-  %3 = trunc i64 %0 to i32
-  %4 = trunc i64 %1 to i32
-  %5 = sdiv i32 %3, %4
-  ret i32 %5
+  %conv1 = trunc i64 %a to i32
+  %conv2 = trunc i64 %b to i32
+  %r = sdiv i32 %conv1, %conv2
+  ret i32 %r
 }
 
-define i32  @divwu(i64 %0, i64 %1) {
+define i32  @divwu(i64 %a, i64 %b) {
 ; LA64-LABEL: divwu:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    addi.w $a1, $a1, 0
@@ -32,31 +34,33 @@ define i32  @divwu(i64 %0, i64 %1) {
 ; LA64-DIV32:       # %bb.0:
 ; LA64-DIV32-NEXT:    div.wu $a0, $a0, $a1
 ; LA64-DIV32-NEXT:    ret
-  %3 = trunc i64 %0 to i32
-  %4 = trunc i64 %1 to i32
-  %5 = udiv i32 %3, %4
-  ret i32 %5
+  %conv1 = trunc i64 %a to i32
+  %conv2 = trunc i64 %b to i32
+  %r = udiv i32 %conv1, %conv2
+  ret i32 %r
 }
 
-define i32 @modw(i64 %0, i64 %1) {
+define i32 @modw(i64 %a, i64 %b) {
 ; LA64-LABEL: modw:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:    addi.w $a0, $a0, 0
-; LA64-NEXT:    mod.w $a0, $a0, $a1
+; LA64-NEXT:    addi.w $a1, $a1, 0
+; LA64-NEXT:    mod.d $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA64-DIV32-LABEL: modw:
 ; LA64-DIV32:       # %bb.0:
-; LA64-DIV32-NEXT:    mod.w $a0, $a0, $a1
+; LA64-DIV32-NEXT:    addi.w $a0, $a0, 0
+; LA64-DIV32-NEXT:    addi.w $a1, $a1, 0
+; LA64-DIV32-NEXT:    mod.d $a0, $a0, $a1
 ; LA64-DIV32-NEXT:    ret
-  %3 = trunc i64 %0 to i32
-  %4 = trunc i64 %1 to i32
-  %5 = srem i32 %3, %4
-  ret i32 %5
+  %conv1 = trunc i64 %a to i32
+  %conv2 = trunc i64 %b to i32
+  %r = srem i32 %conv1, %conv2
+  ret i32 %r
 }
 
-define i32 @modwu(i64 %0, i64 %1) {
+define i32 @modwu(i64 %a, i64 %b) {
 ; LA64-LABEL: modwu:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    addi.w $a1, $a1, 0
@@ -68,8 +72,8 @@ define i32 @modwu(i64 %0, i64 %1) {
 ; LA64-DIV32:       # %bb.0:
 ; LA64-DIV32-NEXT:    mod.wu $a0, $a0, $a1
 ; LA64-DIV32-NEXT:    ret
-  %3 = trunc i64 %0 to i32
-  %4 = trunc i64 %1 to i32
-  %5 = urem i32 %3, %4
-  ret i32 %5
+  %conv1 = trunc i64 %a to i32
+  %conv2 = trunc i64 %b to i32
+  %r = urem i32 %conv1, %conv2
+  ret i32 %r
 }
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
index 99824f6d7718e7..c5af79157eaadc 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
@@ -121,7 +121,7 @@ define i32 @sdiv_i32(i32 %a, i32 %b) {
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:    addi.w $a0, $a0, 0
-; LA64-NEXT:    div.w $a0, $a0, $a1
+; LA64-NEXT:    div.d $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: sdiv_i32:
@@ -137,7 +137,7 @@ define i32 @sdiv_i32(i32 %a, i32 %b) {
 ; LA64-TRAP:       # %bb.0: # %entry
 ; LA64-TRAP-NEXT:    addi.w $a1, $a1, 0
 ; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT:    div.w $a0, $a0, $a1
+; LA64-TRAP-NEXT:    div.d $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB3_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
@@ -156,7 +156,7 @@ define i32 @sdiv_ui32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-LABEL: sdiv_ui32_si32_si32:
 ; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    div.w $a0, $a0, $a1
+; LA64-NEXT:    div.d $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: sdiv_ui32_si32_si32:
@@ -170,7 +170,7 @@ define i32 @sdiv_ui32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-TRAP-LABEL: sdiv_ui32_si32_si32:
 ; LA64-TRAP:       # %bb.0: # %entry
-; LA64-TRAP-NEXT:    div.w $a0, $a0, $a1
+; LA64-TRAP-NEXT:    div.d $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB4_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
@@ -693,7 +693,7 @@ define i32 @srem_i32(i32 %a, i32 %b) {
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:    addi.w $a0, $a0, 0
-; LA64-NEXT:    mod.w $a0, $a0, $a1
+; LA64-NEXT:    mod.d $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: srem_i32:
@@ -709,7 +709,7 @@ define i32 @srem_i32(i32 %a, i32 %b) {
 ; LA64-TRAP:       # %bb.0: # %entry
 ; LA64-TRAP-NEXT:    addi.w $a1, $a1, 0
 ; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT:    mod.w $a0, $a0, $a1
+; LA64-TRAP-NEXT:    mod.d $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB19_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
@@ -728,7 +728,7 @@ define i32 @srem_ui32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-LABEL: srem_ui32_si32_si32:
 ; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    mod.w $a0, $a0, $a1
+; LA64-NEXT:    mod.d $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: srem_ui32_si32_si32:
@@ -742,7 +742,7 @@ define i32 @srem_ui32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-TRAP-LABEL: srem_ui32_si32_si32:
 ; LA64-TRAP:       # %bb.0: # %entry
-; LA64-TRAP-NEXT:    mod.w $a0, $a0, $a1
+; LA64-TRAP-NEXT:    mod.d $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB20_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
@@ -763,7 +763,7 @@ define signext i32 @srem_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:    addi.w $a0, $a0, 0
-; LA64-NEXT:    mod.w $a0, $a0, $a1
+; LA64-NEXT:    mod.d $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: srem_si32_ui32_ui32:
@@ -779,7 +779,7 @@ define signext i32 @srem_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64-TRAP:       # %bb.0: # %entry
 ; LA64-TRAP-NEXT:    addi.w $a1, $a1, 0
 ; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT:    mod.w $a0, $a0, $a1
+; LA64-TRAP-NEXT:    mod.d $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB21_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
@@ -798,7 +798,7 @@ define signext i32 @srem_si32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-LABEL: srem_si32_si32_si32:
 ; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    mod.w $a0, $a0, $a1
+; LA64-NEXT:    mod.d $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: srem_si32_si32_si32:
@@ -812,7 +812,7 @@ define signext i32 @srem_si32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-TRAP-LABEL: srem_si32_si32_si32:
 ; LA64-TRAP:       # %bb.0: # %entry
-; LA64-TRAP-NEXT:    mod.w $a0, $a0, $a1
+; LA64-TRAP-NEXT:    mod.d $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB22_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7

>From 20c9378ac7885390d39070449c7b9aa56ed6dd43 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Tue, 26 Nov 2024 16:50:46 +0800
Subject: [PATCH 5/5] add a todo comment

---
 .../LoongArch/ir-instruction/sdiv-udiv-srem-urem-div32.ll       | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem-div32.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem-div32.ll
index f858ee74568dab..b7745ff5e0c42f 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem-div32.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem-div32.ll
@@ -2,6 +2,8 @@
 ; RUN: llc --mtriple=loongarch64 -mattr=+d,-div32 < %s | FileCheck %s --check-prefix=LA64
 ; RUN: llc --mtriple=loongarch64 -mattr=+d,+div32 < %s | FileCheck %s --check-prefix=LA64-DIV32
 
+; TODO: Use div.w/mod.w for sdiv/srem i32
+
 define i32 @divw(i64 %a, i64 %b) {
 ; LA64-LABEL: divw:
 ; LA64:       # %bb.0:



More information about the llvm-commits mailing list