[PATCH] D129688: [RISCV] Fold (sra (sext_inreg (shl X, C1), i32), C2) -> (sra (shl X, C1+32), C2+32).

Wed Jul 13 12:54:41 PDT 2022

craig.topper created this revision.
craig.topper added reviewers: reames, asb, luismarques.
Herald added subscribers: sunshaoce, VincentWu, luke957, StephenFan, vkmr, frasercrmck, evandro, apazos, sameer.abuasal, s.egerton, Jim, benna, psnobl, jocewei, PkmX, the_o, brucehoult, MartinMosbeck, rogfer01, edward-jones, zzheng, jrtc27, shiva0217, kito-cheng, niosHD, sabuasal, simoncook, johnrusso, rbar, hiraditya, arichardson.
Herald added a project: All.
craig.topper requested review of this revision.
Herald added subscribers: pcwang-thead, eopXD, MaskRay.
Herald added a project: LLVM.

The former pattern will select as slliw+sraiw while the latter
will select as slli+srai. This can enable the slli+srai to be
compressed.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D129688

Files:
  llvm/lib/Target/RISCV/RISCVISelLowering.cpp
  llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll


Index: llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll
===================================================================

--- llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll
+++ llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll
@@ -159,13 +159,12 @@
   ret i64 %5
 }
 
-; TODO: We should use slli+srai to enable the possibility of compressed
-; instructions.
+; Make sure we use slli+srai to enable the possibility of compressed
 define i32 @test12(i32 signext %0) {
 ; RV64I-LABEL: test12:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    slliw a0, a0, 17
-; RV64I-NEXT:    sraiw a0, a0, 15
+; RV64I-NEXT:    slli a0, a0, 49
+; RV64I-NEXT:    srai a0, a0, 47
 ; RV64I-NEXT:    ret
   %2 = shl i32 %0, 17
   %3 = ashr i32 %2, 15
Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp
===================================================================
--- llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -8564,12 +8564,6 @@
   return Opcode;
 }
 
-// Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
-// FIXME: Should this be a generic combine? There's a similar combine on X86.
-//
-// Also try these folds where an add or sub is in the middle.
-// (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
-// (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
                                  const RISCVSubtarget &Subtarget) {
   assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
@@ -8577,12 +8571,40 @@
   if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
     return SDValue();
 
-  auto *ShAmtC = dyn_cast<ConstantSDNode>(N->getOperand(1));
-  if (!ShAmtC || ShAmtC->getZExtValue() > 32)
+  if (!isa<ConstantSDNode>(N->getOperand(1)))
+    return SDValue();
+  uint64_t ShAmt = N->getConstantOperandVal(1);
+  if (ShAmt > 32)
     return SDValue();
 
   SDValue N0 = N->getOperand(0);
 
+  // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
+  // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
+  // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
+  if (ShAmt < 32 &&
+      N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&
+      cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 &&
+      N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() &&
+      isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
+    uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
+    if (LShAmt < 32) {
+      SDLoc ShlDL(N0.getOperand(0));
+      SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,
+                                N0.getOperand(0).getOperand(0),
+                                DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64));
+      SDLoc DL(N);
+      return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,
+                         DAG.getConstant(ShAmt + 32, DL, MVT::i64));
+    }
+  }
+
+  // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
+  // FIXME: Should this be a generic combine? There's a similar combine on X86.
+  //
+  // Also try these folds where an add or sub is in the middle.
+  // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
+  // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
   SDValue Shl;
   ConstantSDNode *AddC = nullptr;
 
@@ -8628,12 +8650,12 @@
 
   SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
                              DAG.getValueType(MVT::i32));
-  if (ShAmtC->getZExtValue() == 32)
+  if (ShAmt == 32)
     return SExt;
 
   return DAG.getNode(
       ISD::SHL, DL, MVT::i64, SExt,
-      DAG.getConstant(32 - ShAmtC->getZExtValue(), DL, MVT::i64));
+      DAG.getConstant(32 - ShAmt, DL, MVT::i64));
 }
 
 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D129688.444396.patch
Type: text/x-patch
Size: 3843 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220713/37064382/attachment-0001.bin>