[llvm] 73e5b9e - [RISCV] Select (srl (sext_inreg X, i32), uimm5) to SRAIW if only lower 32 bits are used.

Thu Sep 16 11:04:09 PDT 2021

Author: Craig Topper
Date: 2021-09-16T11:03:35-07:00
New Revision: 73e5b9ea90ba857dd7f0f6b79dc39dfc90ad66ea

URL: https://github.com/llvm/llvm-project/commit/73e5b9ea90ba857dd7f0f6b79dc39dfc90ad66ea
DIFF: https://github.com/llvm/llvm-project/commit/73e5b9ea90ba857dd7f0f6b79dc39dfc90ad66ea.diff

LOG: [RISCV] Select (srl (sext_inreg X, i32), uimm5) to SRAIW if only lower 32 bits are used.

SimplifyDemandedBits can turn srl into sra if the bits being shifted
in aren't demanded. This patch can recover the original sra in some cases.

I've renamed the tablegen class for detecting W users since the "overflowing operator"
term I originally borrowed from Operator.h does not include srl.

Reviewed By: luismarques

Differential Revision: https://reviews.llvm.org/D109162

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
    llvm/lib/Target/RISCV/RISCVInstrInfo.td
    llvm/lib/Target/RISCV/RISCVInstrInfoM.td
    llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll
    llvm/test/CodeGen/RISCV/srem-lkk.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 984e40fdb5bdf..d4d460160b9f1 100644

--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1536,6 +1536,7 @@ bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) {
 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
   assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
           Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
+          Node->getOpcode() == ISD::SRL ||
           Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
           isa<ConstantSDNode>(Node)) &&
          "Unexpected opcode");

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 86e715ffdb75e..575df86d27a8d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1358,7 +1358,7 @@ def : Pat<(i64 (shl (and GPR:$rs1, 0xffffffff), uimm5:$shamt)),
 
 // PatFrag to allow ADDW/SUBW/MULW/SLLW to be selected from i64 add/sub/mul/shl
 // if only the lower 32 bits of their result is used.
-class overflowingbinopw<SDPatternOperator operator>
+class binop_allwusers<SDPatternOperator operator>
     : PatFrag<(ops node:$lhs, node:$rhs),
               (operator node:$lhs, node:$rhs), [{
   return hasAllWUsers(Node);
@@ -1393,12 +1393,17 @@ def : PatGprGpr<shiftopw<riscv_sllw>, SLLW>;
 def : PatGprGpr<shiftopw<riscv_srlw>, SRLW>;
 def : PatGprGpr<shiftopw<riscv_sraw>, SRAW>;
 
-// Select W instructions without sext_inreg if only the lower 32 bits of the
-// result are used.
-def : PatGprGpr<overflowingbinopw<add>, ADDW>;
-def : PatGprSimm12<overflowingbinopw<add>, ADDIW>;
-def : PatGprGpr<overflowingbinopw<sub>, SUBW>;
-def : PatGprImm<overflowingbinopw<shl>, SLLIW, uimm5>;
+// Select W instructions if only the lower 32 bits of the result are used.
+def : PatGprGpr<binop_allwusers<add>, ADDW>;
+def : PatGprSimm12<binop_allwusers<add>, ADDIW>;
+def : PatGprGpr<binop_allwusers<sub>, SUBW>;
+def : PatGprImm<binop_allwusers<shl>, SLLIW, uimm5>;
+
+// If this is a shr of a value sign extended from i32, and all the users only
+// use the lower 32 bits, we can use an sraiw to remove the sext_inreg. This
+// occurs because SimplifyDemandedBits prefers srl over sra.
+def : Pat<(binop_allwusers<srl> (sext_inreg GPR:$rs1, i32), uimm5:$shamt),
+          (SRAIW GPR:$rs1, uimm5:$shamt)>;
 
 /// Loads
 
@@ -1441,7 +1446,7 @@ def : Pat<(add GPR:$rs1, (AddiPair:$rs2)),
 
 let Predicates = [IsRV64] in {
 // Select W instructions if only the lower 32-bits of the result are used.
-def : Pat<(overflowingbinopw<add> GPR:$rs1, (AddiPair:$rs2)),
+def : Pat<(binop_allwusers<add> GPR:$rs1, (AddiPair:$rs2)),
           (ADDIW (ADDIW GPR:$rs1, (AddiPairImmB AddiPair:$rs2)),
                  (AddiPairImmA AddiPair:$rs2))>;
 }

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
index e03536027ec00..147a70a809d1d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -72,9 +72,8 @@ def : PatGprGpr<urem, REMU>;
 } // Predicates = [HasStdExtM]
 
 let Predicates = [HasStdExtM, IsRV64] in {
-// Select W instructions without sext_inreg if only the lower 32-bits of the
-// result are used.
-def : PatGprGpr<overflowingbinopw<mul>, MULW>;
+// Select W instructions if only the lower 32-bits of the result are used.
+def : PatGprGpr<binop_allwusers<mul>, MULW>;
 
 def : PatGprGpr<riscv_divw, DIVW>;
 def : PatGprGpr<riscv_divuw, DIVUW>;

diff  --git a/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll b/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll
index c4b15760d6823..f0bcecdc75013 100644
--- a/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll
@@ -1964,8 +1964,7 @@ define zeroext i32 @zext_sraiw_aext(i32 %a) nounwind {
 ;
 ; RV64ZBA-LABEL: zext_sraiw_aext:
 ; RV64ZBA:       # %bb.0:
-; RV64ZBA-NEXT:    sext.w a0, a0
-; RV64ZBA-NEXT:    srli a0, a0, 7
+; RV64ZBA-NEXT:    sraiw a0, a0, 7
 ; RV64ZBA-NEXT:    zext.w a0, a0
 ; RV64ZBA-NEXT:    ret
   %1 = ashr i32 %a, 7
@@ -1999,8 +1998,7 @@ define zeroext i32 @zext_sraiw_zext(i32 zeroext %a) nounwind {
 ;
 ; RV64ZBA-LABEL: zext_sraiw_zext:
 ; RV64ZBA:       # %bb.0:
-; RV64ZBA-NEXT:    sext.w a0, a0
-; RV64ZBA-NEXT:    srli a0, a0, 9
+; RV64ZBA-NEXT:    sraiw a0, a0, 9
 ; RV64ZBA-NEXT:    zext.w a0, a0
 ; RV64ZBA-NEXT:    ret
   %1 = ashr i32 %a, 9

diff  --git a/llvm/test/CodeGen/RISCV/srem-lkk.ll b/llvm/test/CodeGen/RISCV/srem-lkk.ll
index a890e04008b90..ac23823de2692 100644
--- a/llvm/test/CodeGen/RISCV/srem-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/srem-lkk.ll
@@ -53,7 +53,7 @@ define i32 @fold_srem_positive_odd(i32 %x) nounwind {
 ; RV64IM-NEXT:    srli a1, a1, 32
 ; RV64IM-NEXT:    addw a1, a1, a0
 ; RV64IM-NEXT:    srliw a2, a1, 31
-; RV64IM-NEXT:    srli a1, a1, 6
+; RV64IM-NEXT:    sraiw a1, a1, 6
 ; RV64IM-NEXT:    addw a1, a1, a2
 ; RV64IM-NEXT:    addi a2, zero, 95
 ; RV64IM-NEXT:    mulw a1, a1, a2