[llvm] a027bea - [LoongArch] Select {DIV,MOD}.{W,WU} instruction to eliminate explicit sign extension (#92205)

via llvm-commits llvm-commits at lists.llvm.org
Mon May 20 00:24:55 PDT 2024


Author: hev
Date: 2024-05-20T15:24:52+08:00
New Revision: a027bea438e285380450f5b380be072f44ee0312

URL: https://github.com/llvm/llvm-project/commit/a027bea438e285380450f5b380be072f44ee0312
DIFF: https://github.com/llvm/llvm-project/commit/a027bea438e285380450f5b380be072f44ee0312.diff

LOG: [LoongArch] Select {DIV,MOD}.{W,WU} instruction to eliminate explicit sign extension (#92205)

Added: 
    

Modified: 
    llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
    llvm/lib/Target/LoongArch/LoongArchISelLowering.h
    llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
    llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index fe2c613b1b30f..8a87c82a205bb 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -139,6 +139,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
 
     setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
     setOperationAction(ISD::BSWAP, MVT::i32, Custom);
+    setOperationAction({ISD::UDIV, ISD::UREM}, MVT::i32, Custom);
   }
 
   // Set operations for LA32 only.
@@ -1665,6 +1666,10 @@ static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
   switch (Opcode) {
   default:
     llvm_unreachable("Unexpected opcode");
+  case ISD::UDIV:
+    return LoongArchISD::DIV_WU;
+  case ISD::UREM:
+    return LoongArchISD::MOD_WU;
   case ISD::SHL:
     return LoongArchISD::SLL_W;
   case ISD::SRA:
@@ -1841,6 +1846,12 @@ void LoongArchTargetLowering::ReplaceNodeResults(
   switch (N->getOpcode()) {
   default:
     llvm_unreachable("Don't know how to legalize this operation");
+  case ISD::UDIV:
+  case ISD::UREM:
+    assert(VT == MVT::i32 && Subtarget.is64Bit() &&
+           "Unexpected custom legalisation");
+    Results.push_back(customLegalizeToWOp(N, DAG, 2, ISD::SIGN_EXTEND));
+    break;
   case ISD::SHL:
   case ISD::SRA:
   case ISD::SRL:
@@ -3445,6 +3456,8 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
     NODE_NAME_CASE(BITREV_W)
     NODE_NAME_CASE(ROTR_W)
     NODE_NAME_CASE(ROTL_W)
+    NODE_NAME_CASE(DIV_WU)
+    NODE_NAME_CASE(MOD_WU)
     NODE_NAME_CASE(CLZ_W)
     NODE_NAME_CASE(CTZ_W)
     NODE_NAME_CASE(DBAR)

diff  --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index de3f45172e25a..f274b1971fd23 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -43,6 +43,10 @@ enum NodeType : unsigned {
   ROTL_W,
   ROTR_W,
 
+  // unsigned 32-bit integer division
+  DIV_WU,
+  MOD_WU,
+
   // FPR<->GPR transfer operations
   MOVGR2FR_W_LA64,
   MOVFR2GR_S_LA64,

diff  --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index f56f8f7e1179c..35ea9f07866d5 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -85,6 +85,8 @@ def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>;
 def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>;
 def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>;
 def loongarch_rotr_w : SDNode<"LoongArchISD::ROTR_W", SDT_LoongArchIntBinOpW>;
+def loongarch_div_wu : SDNode<"LoongArchISD::DIV_WU", SDT_LoongArchIntBinOpW>;
+def loongarch_mod_wu : SDNode<"LoongArchISD::MOD_WU", SDT_LoongArchIntBinOpW>;
 def loongarch_crc_w_b_w
     : SDNode<"LoongArchISD::CRC_W_B_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
 def loongarch_crc_w_h_w
@@ -1110,9 +1112,13 @@ def : PatGprImm_32<add, ADDI_W, simm12>;
 def : PatGprGpr<sub, SUB_D>;
 def : PatGprGpr_32<sub, SUB_W>;
 def : PatGprGpr<sdiv, DIV_D>;
+def : PatGprGpr_32<sdiv, DIV_W>;
 def : PatGprGpr<udiv, DIV_DU>;
+def : PatGprGpr<loongarch_div_wu, DIV_WU>;
 def : PatGprGpr<srem, MOD_D>;
+def : PatGprGpr_32<srem, MOD_W>;
 def : PatGprGpr<urem, MOD_DU>;
+def : PatGprGpr<loongarch_mod_wu, MOD_WU>;
 def : PatGprGpr<rotr, ROTR_D>;
 def : PatGprGpr<loongarch_rotr_w, ROTR_W>;
 def : PatGprGpr_32<rotr, ROTR_W>;

diff  --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
index 2064c398948fe..ab3eec240db3c 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
@@ -191,8 +191,7 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:    addi.w $a0, $a0, 0
-; LA64-NEXT:    div.d $a0, $a0, $a1
-; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    div.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: sdiv_si32_ui32_ui32:
@@ -208,12 +207,11 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64-TRAP:       # %bb.0: # %entry
 ; LA64-TRAP-NEXT:    addi.w $a1, $a1, 0
 ; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT:    div.d $a0, $a0, $a1
+; LA64-TRAP-NEXT:    div.w $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB5_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
 ; LA64-TRAP-NEXT:  .LBB5_2: # %entry
-; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
 ; LA64-TRAP-NEXT:    ret
 entry:
   %r = sdiv i32 %a, %b
@@ -228,8 +226,7 @@ define signext i32 @sdiv_si32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-LABEL: sdiv_si32_si32_si32:
 ; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    div.d $a0, $a0, $a1
-; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    div.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: sdiv_si32_si32_si32:
@@ -243,12 +240,11 @@ define signext i32 @sdiv_si32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-TRAP-LABEL: sdiv_si32_si32_si32:
 ; LA64-TRAP:       # %bb.0: # %entry
-; LA64-TRAP-NEXT:    div.d $a0, $a0, $a1
+; LA64-TRAP-NEXT:    div.w $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB6_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
 ; LA64-TRAP-NEXT:  .LBB6_2: # %entry
-; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
 ; LA64-TRAP-NEXT:    ret
 entry:
   %r = sdiv i32 %a, %b
@@ -407,9 +403,9 @@ define i32 @udiv_i32(i32 %a, i32 %b) {
 ;
 ; LA64-LABEL: udiv_i32:
 ; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 0
-; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT:    div.du $a0, $a0, $a1
+; LA64-NEXT:    addi.w $a1, $a1, 0
+; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    div.wu $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: udiv_i32:
@@ -423,9 +419,9 @@ define i32 @udiv_i32(i32 %a, i32 %b) {
 ;
 ; LA64-TRAP-LABEL: udiv_i32:
 ; LA64-TRAP:       # %bb.0: # %entry
-; LA64-TRAP-NEXT:    bstrpick.d $a1, $a1, 31, 0
-; LA64-TRAP-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-TRAP-NEXT:    div.du $a0, $a0, $a1
+; LA64-TRAP-NEXT:    addi.w $a1, $a1, 0
+; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
+; LA64-TRAP-NEXT:    div.wu $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB11_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
@@ -444,9 +440,7 @@ define i32 @udiv_ui32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-LABEL: udiv_ui32_si32_si32:
 ; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 0
-; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT:    div.du $a0, $a0, $a1
+; LA64-NEXT:    div.wu $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: udiv_ui32_si32_si32:
@@ -460,9 +454,7 @@ define i32 @udiv_ui32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-TRAP-LABEL: udiv_ui32_si32_si32:
 ; LA64-TRAP:       # %bb.0: # %entry
-; LA64-TRAP-NEXT:    bstrpick.d $a1, $a1, 31, 0
-; LA64-TRAP-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-TRAP-NEXT:    div.du $a0, $a0, $a1
+; LA64-TRAP-NEXT:    div.wu $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB12_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
@@ -481,10 +473,9 @@ define signext i32 @udiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ;
 ; LA64-LABEL: udiv_si32_ui32_ui32:
 ; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 0
-; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT:    div.du $a0, $a0, $a1
+; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    div.wu $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: udiv_si32_ui32_ui32:
@@ -498,14 +489,13 @@ define signext i32 @udiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ;
 ; LA64-TRAP-LABEL: udiv_si32_ui32_ui32:
 ; LA64-TRAP:       # %bb.0: # %entry
-; LA64-TRAP-NEXT:    bstrpick.d $a1, $a1, 31, 0
-; LA64-TRAP-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-TRAP-NEXT:    div.du $a0, $a0, $a1
+; LA64-TRAP-NEXT:    addi.w $a1, $a1, 0
+; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
+; LA64-TRAP-NEXT:    div.wu $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB13_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
 ; LA64-TRAP-NEXT:  .LBB13_2: # %entry
-; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
 ; LA64-TRAP-NEXT:    ret
 entry:
   %r = udiv i32 %a, %b
@@ -520,10 +510,7 @@ define signext i32 @udiv_si32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-LABEL: udiv_si32_si32_si32:
 ; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 0
-; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT:    div.du $a0, $a0, $a1
-; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    div.wu $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: udiv_si32_si32_si32:
@@ -537,14 +524,11 @@ define signext i32 @udiv_si32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-TRAP-LABEL: udiv_si32_si32_si32:
 ; LA64-TRAP:       # %bb.0: # %entry
-; LA64-TRAP-NEXT:    bstrpick.d $a1, $a1, 31, 0
-; LA64-TRAP-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-TRAP-NEXT:    div.du $a0, $a0, $a1
+; LA64-TRAP-NEXT:    div.wu $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB14_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
 ; LA64-TRAP-NEXT:  .LBB14_2: # %entry
-; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
 ; LA64-TRAP-NEXT:    ret
 entry:
   %r = udiv i32 %a, %b
@@ -995,9 +979,9 @@ define i32 @urem_i32(i32 %a, i32 %b) {
 ;
 ; LA64-LABEL: urem_i32:
 ; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 0
-; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT:    mod.du $a0, $a0, $a1
+; LA64-NEXT:    addi.w $a1, $a1, 0
+; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    mod.wu $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: urem_i32:
@@ -1011,9 +995,9 @@ define i32 @urem_i32(i32 %a, i32 %b) {
 ;
 ; LA64-TRAP-LABEL: urem_i32:
 ; LA64-TRAP:       # %bb.0: # %entry
-; LA64-TRAP-NEXT:    bstrpick.d $a1, $a1, 31, 0
-; LA64-TRAP-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-TRAP-NEXT:    mod.du $a0, $a0, $a1
+; LA64-TRAP-NEXT:    addi.w $a1, $a1, 0
+; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
+; LA64-TRAP-NEXT:    mod.wu $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB27_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
@@ -1032,9 +1016,7 @@ define i32 @urem_ui32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-LABEL: urem_ui32_si32_si32:
 ; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 0
-; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT:    mod.du $a0, $a0, $a1
+; LA64-NEXT:    mod.wu $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: urem_ui32_si32_si32:
@@ -1048,9 +1030,7 @@ define i32 @urem_ui32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-TRAP-LABEL: urem_ui32_si32_si32:
 ; LA64-TRAP:       # %bb.0: # %entry
-; LA64-TRAP-NEXT:    bstrpick.d $a1, $a1, 31, 0
-; LA64-TRAP-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-TRAP-NEXT:    mod.du $a0, $a0, $a1
+; LA64-TRAP-NEXT:    mod.wu $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB28_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
@@ -1069,10 +1049,9 @@ define signext i32 @urem_si32_ui32_ui32(i32 %a, i32 %b) {
 ;
 ; LA64-LABEL: urem_si32_ui32_ui32:
 ; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 0
-; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT:    mod.du $a0, $a0, $a1
+; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    mod.wu $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: urem_si32_ui32_ui32:
@@ -1086,14 +1065,13 @@ define signext i32 @urem_si32_ui32_ui32(i32 %a, i32 %b) {
 ;
 ; LA64-TRAP-LABEL: urem_si32_ui32_ui32:
 ; LA64-TRAP:       # %bb.0: # %entry
-; LA64-TRAP-NEXT:    bstrpick.d $a1, $a1, 31, 0
-; LA64-TRAP-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-TRAP-NEXT:    mod.du $a0, $a0, $a1
+; LA64-TRAP-NEXT:    addi.w $a1, $a1, 0
+; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
+; LA64-TRAP-NEXT:    mod.wu $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB29_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
 ; LA64-TRAP-NEXT:  .LBB29_2: # %entry
-; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
 ; LA64-TRAP-NEXT:    ret
 entry:
   %r = urem i32 %a, %b
@@ -1108,10 +1086,7 @@ define signext i32 @urem_si32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-LABEL: urem_si32_si32_si32:
 ; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 0
-; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT:    mod.du $a0, $a0, $a1
-; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    mod.wu $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: urem_si32_si32_si32:
@@ -1125,14 +1100,11 @@ define signext i32 @urem_si32_si32_si32(i32 signext %a, i32 signext %b) {
 ;
 ; LA64-TRAP-LABEL: urem_si32_si32_si32:
 ; LA64-TRAP:       # %bb.0: # %entry
-; LA64-TRAP-NEXT:    bstrpick.d $a1, $a1, 31, 0
-; LA64-TRAP-NEXT:    bstrpick.d $a0, $a0, 31, 0
-; LA64-TRAP-NEXT:    mod.du $a0, $a0, $a1
+; LA64-TRAP-NEXT:    mod.wu $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB30_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
 ; LA64-TRAP-NEXT:  .LBB30_2: # %entry
-; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
 ; LA64-TRAP-NEXT:    ret
 entry:
   %r = urem i32 %a, %b


        


More information about the llvm-commits mailing list