[llvm] d4f4a1b - [RISCV] Add DAG combine to detect opportunities to replace (i64 (any_extend (i32 X)) with sign_extend.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 25 23:18:42 PDT 2021
Author: Craig Topper
Date: 2021-06-25T23:16:37-07:00
New Revision: d4f4a1ba626d7c3e4442d6f68feb79d56eba9601
URL: https://github.com/llvm/llvm-project/commit/d4f4a1ba626d7c3e4442d6f68feb79d56eba9601
DIFF: https://github.com/llvm/llvm-project/commit/d4f4a1ba626d7c3e4442d6f68feb79d56eba9601.diff
LOG: [RISCV] Add DAG combine to detect opportunities to replace (i64 (any_extend (i32 X)) with sign_extend.
If type legalization is going to insert a sign_extend for other users
of X and we can fold the sign_extend into ADDW/MULW/SUBW, it is
better to replace the ANY_EXTEND so we don't end up with a separate
ADD/MUL/SUB instruction for the users of the ANY_EXTEND.
I'm only handling setcc uses right now, but there are other
instructions that force sign_extends like ashr.
There are probably other *W instructions we could use in addition
to ADDW/SUBW/MULW.
My motivating case was a loop terminating compare and a phi use
as seen in the new test file.
Reviewed By: asb
Differential Revision: https://reviews.llvm.org/D104581
Added:
llvm/test/CodeGen/RISCV/aext-to-sext.ll
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rv64zbb.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8a2e96d44399a..7f9d0cf4637df 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -836,6 +836,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::XOR);
+ setTargetDAGCombine(ISD::ANY_EXTEND);
if (Subtarget.hasStdExtV()) {
setTargetDAGCombine(ISD::FCOPYSIGN);
setTargetDAGCombine(ISD::MGATHER);
@@ -5606,6 +5607,83 @@ static SDValue performXORCombine(SDNode *N,
return combineSelectCCAndUseCommutative(N, DAG, false);
}
+// Attempt to turn ANY_EXTEND into SIGN_EXTEND if the input to the ANY_EXTEND
+// has users that require SIGN_EXTEND and the SIGN_EXTEND can be done for free
+// by an instruction like ADDW/SUBW/MULW. Without this the ANY_EXTEND would be
+// removed during type legalization leaving an ADD/SUB/MUL use that won't use
+// ADDW/SUBW/MULW.
+static SDValue performANY_EXTENDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const RISCVSubtarget &Subtarget) {
+ if (!Subtarget.is64Bit())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+
+ SDValue Src = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i64 || Src.getValueType() != MVT::i32)
+ return SDValue();
+
+ // The opcode must be one that can implicitly sign_extend.
+ // FIXME: Additional opcodes.
+ switch (Src.getOpcode()) {
+ default:
+ return SDValue();
+ case ISD::MUL:
+ if (!Subtarget.hasStdExtM())
+ return SDValue();
+ LLVM_FALLTHROUGH;
+ case ISD::ADD:
+ case ISD::SUB:
+ break;
+ }
+
+ SmallVector<SDNode *, 4> SetCCs;
+ for (SDNode::use_iterator UI = Src.getNode()->use_begin(),
+ UE = Src.getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == N)
+ continue;
+ if (UI.getUse().getResNo() != Src.getResNo())
+ continue;
+ // All i32 setccs are legalized by sign extending operands.
+ if (User->getOpcode() == ISD::SETCC) {
+ SetCCs.push_back(User);
+ continue;
+ }
+ // We don't know if we can extend this user.
+ break;
+ }
+
+ // If we don't have any SetCCs, this isn't worthwhile.
+ if (SetCCs.empty())
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src);
+ DCI.CombineTo(N, SExt);
+
+ // Promote all the setccs.
+ for (SDNode *SetCC : SetCCs) {
+ SmallVector<SDValue, 4> Ops;
+
+ for (unsigned j = 0; j != 2; ++j) {
+ SDValue SOp = SetCC->getOperand(j);
+ if (SOp == Src)
+ Ops.push_back(SExt);
+ else
+ Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, SOp));
+ }
+
+ Ops.push_back(SetCC->getOperand(2));
+ DCI.CombineTo(SetCC,
+ DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
+ }
+ return SDValue(N, 0);
+}
+
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -5830,6 +5908,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return performORCombine(N, DCI, Subtarget);
case ISD::XOR:
return performXORCombine(N, DCI, Subtarget);
+ case ISD::ANY_EXTEND:
+ return performANY_EXTENDCombine(N, DCI, Subtarget);
case RISCVISD::SELECT_CC: {
// Transform
SDValue LHS = N->getOperand(0);
diff --git a/llvm/test/CodeGen/RISCV/aext-to-sext.ll b/llvm/test/CodeGen/RISCV/aext-to-sext.ll
new file mode 100644
index 0000000000000..45b30dd60a3d5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/aext-to-sext.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64I
+
+; Make sure we don't generate an addi in the loop in
+; addition to the addiw. Previously we type legalize the
+; setcc use using signext and the phi use using anyext.
+; We now detect when it would be beneficial to replace
+; anyext with signext.
+
+define void @quux(i32 signext %arg, i32 signext %arg1) nounwind {
+; RV64I-LABEL: quux:
+; RV64I: # %bb.0: # %bb
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: beq a0, a1, .LBB0_3
+; RV64I-NEXT: # %bb.1: # %bb2.preheader
+; RV64I-NEXT: mv s0, a1
+; RV64I-NEXT: mv s1, a0
+; RV64I-NEXT: .LBB0_2: # %bb2
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: call hoge at plt
+; RV64I-NEXT: addiw s1, s1, 1
+; RV64I-NEXT: bne s1, s0, .LBB0_2
+; RV64I-NEXT: .LBB0_3: # %bb6
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+bb:
+ %tmp = icmp eq i32 %arg, %arg1
+ br i1 %tmp, label %bb6, label %bb2
+
+bb2: ; preds = %bb2, %bb
+ %tmp3 = phi i32 [ %tmp4, %bb2 ], [ %arg, %bb ]
+ tail call void @hoge()
+ %tmp4 = add nsw i32 %tmp3, 1
+ %tmp5 = icmp eq i32 %tmp4, %arg1
+ br i1 %tmp5, label %bb6, label %bb2
+
+bb6: ; preds = %bb2, %bb
+ ret void
+}
+
+declare void @hoge()
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 97a89c35895be..7a20bf0f0320c 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -193,12 +193,11 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: addiw a2, a0, -1
+; RV64I-NEXT: addiw a0, a0, -1
; RV64I-NEXT: addi s0, zero, 32
; RV64I-NEXT: addi a1, zero, 32
-; RV64I-NEXT: beqz a2, .LBB2_2
+; RV64I-NEXT: beqz a0, .LBB2_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: srliw a1, a0, 1
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
More information about the llvm-commits
mailing list