[llvm] Use div.w/mod.w to eliminate unnecessary sign-extend for sdiv/srem i32. (PR #117298)

via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 21 23:07:25 PST 2024


https://github.com/tangaac created https://github.com/llvm/llvm-project/pull/117298

None

>From 39b432cc711d258b30da615a5eafcffd2796e88a Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Fri, 22 Nov 2024 12:50:20 +0800
Subject: [PATCH] Use div.w/mod.w to eliminate unnecessary sign-extend
 instructions for sdiv/srem i32 .

---
 .../LoongArch/LoongArchISelLowering.cpp       | 11 ++++++++-
 .../Target/LoongArch/LoongArchISelLowering.h  |  2 ++
 .../Target/LoongArch/LoongArchInstrInfo.td    |  4 ++++
 .../ir-instruction/sdiv-udiv-srem-urem.ll     | 24 +++++++++----------
 4 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 5c567ed4a6f724..a202d9e9b5b50c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -141,7 +141,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
 
     setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
     setOperationAction(ISD::BSWAP, MVT::i32, Custom);
-    setOperationAction({ISD::UDIV, ISD::UREM}, MVT::i32, Custom);
+    setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32,
+                       Custom);
     setOperationAction(ISD::LROUND, MVT::i32, Custom);
   }
 
@@ -2629,8 +2630,12 @@ static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
   switch (Opcode) {
   default:
     llvm_unreachable("Unexpected opcode");
+  case ISD::SDIV:
+    return LoongArchISD::DIV_W;
   case ISD::UDIV:
     return LoongArchISD::DIV_WU;
+  case ISD::SREM:
+    return LoongArchISD::MOD_W;
   case ISD::UREM:
     return LoongArchISD::MOD_WU;
   case ISD::SHL:
@@ -2827,7 +2832,9 @@ void LoongArchTargetLowering::ReplaceNodeResults(
            "Unexpected custom legalisation");
     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
     break;
+  case ISD::SDIV:
   case ISD::UDIV:
+  case ISD::SREM:
   case ISD::UREM:
     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
            "Unexpected custom legalisation");
@@ -4667,7 +4674,9 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
     NODE_NAME_CASE(BITREV_W)
     NODE_NAME_CASE(ROTR_W)
     NODE_NAME_CASE(ROTL_W)
+    NODE_NAME_CASE(DIV_W)
     NODE_NAME_CASE(DIV_WU)
+    NODE_NAME_CASE(MOD_W)
     NODE_NAME_CASE(MOD_WU)
     NODE_NAME_CASE(CLZ_W)
     NODE_NAME_CASE(CTZ_W)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 1aa686695b49b8..5a47dfb257175f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -44,6 +44,8 @@ enum NodeType : unsigned {
   ROTR_W,
 
   // unsigned 32-bit integer division
+  DIV_W,
+  MOD_W,
   DIV_WU,
   MOD_WU,
 
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index cd1500229f4aa9..b9d4f00717a565 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -85,7 +85,9 @@ def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>;
 def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>;
 def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>;
 def loongarch_rotr_w : SDNode<"LoongArchISD::ROTR_W", SDT_LoongArchIntBinOpW>;
+def loongarch_div_w : SDNode<"LoongArchISD::DIV_W", SDT_LoongArchIntBinOpW>;
 def loongarch_div_wu : SDNode<"LoongArchISD::DIV_WU", SDT_LoongArchIntBinOpW>;
+def loongarch_mod_w : SDNode<"LoongArchISD::MOD_W", SDT_LoongArchIntBinOpW>;
 def loongarch_mod_wu : SDNode<"LoongArchISD::MOD_WU", SDT_LoongArchIntBinOpW>;
 def loongarch_crc_w_b_w
     : SDNode<"LoongArchISD::CRC_W_B_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
@@ -1156,10 +1158,12 @@ def : PatGprGpr<sub, SUB_D>;
 def : PatGprGpr<sdiv, DIV_D>;
 def : PatGprGpr_32<sdiv, DIV_W>;
 def : PatGprGpr<udiv, DIV_DU>;
+def : PatGprGpr<loongarch_div_w, DIV_W>;
 def : PatGprGpr<loongarch_div_wu, DIV_WU>;
 def : PatGprGpr<srem, MOD_D>;
 def : PatGprGpr_32<srem, MOD_W>;
 def : PatGprGpr<urem, MOD_DU>;
+def : PatGprGpr<loongarch_mod_w, MOD_W>;
 def : PatGprGpr<loongarch_mod_wu, MOD_WU>;
 def : PatGprGpr<shiftop<rotr>, ROTR_D>;
 def : PatGprGpr<shiftopw<loongarch_rotr_w>, ROTR_W>;
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
index c5af79157eaadc..99824f6d7718e7 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
@@ -121,7 +121,7 @@ define i32 @sdiv_i32(i32 %a, i32 %b) {
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:    addi.w $a0, $a0, 0
-; LA64-NEXT:    div.d $a0, $a0, $a1
+; LA64-NEXT:    div.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: sdiv_i32:
@@ -137,7 +137,7 @@ define i32 @sdiv_i32(i32 %a, i32 %b) {
 ; LA64-TRAP:       # %bb.0: # %entry
 ; LA64-TRAP-NEXT:    addi.w $a1, $a1, 0
 ; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT:    div.d $a0, $a0, $a1
+; LA64-TRAP-NEXT:    div.w $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB3_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
@@ -156,7 +156,7 @@ define i32 @sdiv_ui32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-LABEL: sdiv_ui32_si32_si32:
 ; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    div.d $a0, $a0, $a1
+; LA64-NEXT:    div.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: sdiv_ui32_si32_si32:
@@ -170,7 +170,7 @@ define i32 @sdiv_ui32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-TRAP-LABEL: sdiv_ui32_si32_si32:
 ; LA64-TRAP:       # %bb.0: # %entry
-; LA64-TRAP-NEXT:    div.d $a0, $a0, $a1
+; LA64-TRAP-NEXT:    div.w $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB4_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
@@ -693,7 +693,7 @@ define i32 @srem_i32(i32 %a, i32 %b) {
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:    addi.w $a0, $a0, 0
-; LA64-NEXT:    mod.d $a0, $a0, $a1
+; LA64-NEXT:    mod.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: srem_i32:
@@ -709,7 +709,7 @@ define i32 @srem_i32(i32 %a, i32 %b) {
 ; LA64-TRAP:       # %bb.0: # %entry
 ; LA64-TRAP-NEXT:    addi.w $a1, $a1, 0
 ; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT:    mod.d $a0, $a0, $a1
+; LA64-TRAP-NEXT:    mod.w $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB19_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
@@ -728,7 +728,7 @@ define i32 @srem_ui32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-LABEL: srem_ui32_si32_si32:
 ; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    mod.d $a0, $a0, $a1
+; LA64-NEXT:    mod.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: srem_ui32_si32_si32:
@@ -742,7 +742,7 @@ define i32 @srem_ui32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-TRAP-LABEL: srem_ui32_si32_si32:
 ; LA64-TRAP:       # %bb.0: # %entry
-; LA64-TRAP-NEXT:    mod.d $a0, $a0, $a1
+; LA64-TRAP-NEXT:    mod.w $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB20_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
@@ -763,7 +763,7 @@ define signext i32 @srem_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:    addi.w $a0, $a0, 0
-; LA64-NEXT:    mod.d $a0, $a0, $a1
+; LA64-NEXT:    mod.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: srem_si32_ui32_ui32:
@@ -779,7 +779,7 @@ define signext i32 @srem_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64-TRAP:       # %bb.0: # %entry
 ; LA64-TRAP-NEXT:    addi.w $a1, $a1, 0
 ; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT:    mod.d $a0, $a0, $a1
+; LA64-TRAP-NEXT:    mod.w $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB21_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
@@ -798,7 +798,7 @@ define signext i32 @srem_si32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-LABEL: srem_si32_si32_si32:
 ; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    mod.d $a0, $a0, $a1
+; LA64-NEXT:    mod.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: srem_si32_si32_si32:
@@ -812,7 +812,7 @@ define signext i32 @srem_si32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-TRAP-LABEL: srem_si32_si32_si32:
 ; LA64-TRAP:       # %bb.0: # %entry
-; LA64-TRAP-NEXT:    mod.d $a0, $a0, $a1
+; LA64-TRAP-NEXT:    mod.w $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB22_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7



More information about the llvm-commits mailing list