[llvm] 24810ac - [RISCV] Add isel patterns to select slli+shXadd.uw.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 21 09:32:59 PST 2022
Author: Craig Topper
Date: 2022-11-21T09:32:51-08:00
New Revision: 24810acb629d05eb3253a52a7220486b5b1d6791
URL: https://github.com/llvm/llvm-project/commit/24810acb629d05eb3253a52a7220486b5b1d6791
DIFF: https://github.com/llvm/llvm-project/commit/24810acb629d05eb3253a52a7220486b5b1d6791.diff
LOG: [RISCV] Add isel patterns to select slli+shXadd.uw.
This matches what we get for something like.
%0 = shl i32 %x, C
%1 = zext i32 %0 to i64
%2 = getelementptr i32, ptr %y, %1
The shift before the zext and the shift implied by the GEP get
combined with an AND after them. We need to split it back into
2 shifts so we can fold one into shXadd.uw.
Reviewed By: reames
Differential Revision: https://reviews.llvm.org/D137886
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
llvm/test/CodeGen/RISCV/rv64zba.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index aa2ccefe45f28..70e4f344434dc 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -2234,6 +2234,43 @@ bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
return false;
}
+/// Look for various patterns that can be done with a SHL that can be folded
+/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
+/// SHXADD_UW we are trying to match.
+bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt,
+ SDValue &Val) {
+ if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
+ N.hasOneUse()) {
+ SDValue N0 = N.getOperand(0);
+ if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.hasOneUse()) {
+ uint64_t Mask = N.getConstantOperandVal(1);
+ unsigned C2 = N0.getConstantOperandVal(1);
+
+ Mask &= maskTrailingZeros<uint64_t>(C2);
+
+ // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
+ // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
+ // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
+ if (isShiftedMask_64(Mask)) {
+ unsigned Leading = countLeadingZeros(Mask);
+ unsigned Trailing = countTrailingZeros(Mask);
+ if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
+ SDLoc DL(N);
+ EVT VT = N.getValueType();
+ Val = SDValue(CurDAG->getMachineNode(
+ RISCV::SLLI, DL, VT, N0.getOperand(0),
+ CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
+ 0);
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
// Return true if all users of this SDNode* only consume the lower \p Bits.
// This can be used to form W instructions for add/sub/mul/shl even when the
// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 9dae3b4c08290..8683deeaed7a5 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -70,6 +70,11 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
return selectSHXADDOp(N, ShAmt, Val);
}
+ bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val);
+ template <unsigned ShAmt> bool selectSHXADD_UWOp(SDValue N, SDValue &Val) {
+ return selectSHXADD_UWOp(N, ShAmt, Val);
+ }
+
bool hasAllNBitUsers(SDNode *Node, unsigned Bits) const;
bool hasAllHUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 16); }
bool hasAllWUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 32); }
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 3ea19eafd0a5f..507cb3912180e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -257,6 +257,10 @@ def sh1add_op : ComplexPattern<XLenVT, 1, "selectSHXADDOp<1>", [], [], 6>;
def sh2add_op : ComplexPattern<XLenVT, 1, "selectSHXADDOp<2>", [], [], 6>;
def sh3add_op : ComplexPattern<XLenVT, 1, "selectSHXADDOp<3>", [], [], 6>;
+def sh1add_uw_op : ComplexPattern<XLenVT, 1, "selectSHXADD_UWOp<1>", [], [], 6>;
+def sh2add_uw_op : ComplexPattern<XLenVT, 1, "selectSHXADD_UWOp<2>", [], [], 6>;
+def sh3add_uw_op : ComplexPattern<XLenVT, 1, "selectSHXADD_UWOp<3>", [], [], 6>;
+
//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
@@ -771,6 +775,14 @@ def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), non_imm12:$rs2))
def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), non_imm12:$rs2)),
(SH3ADD_UW GPR:$rs1, GPR:$rs2)>;
+// More complex cases use a ComplexPattern.
+def : Pat<(add sh1add_uw_op:$rs1, non_imm12:$rs2),
+ (SH1ADD_UW sh1add_uw_op:$rs1, GPR:$rs2)>;
+def : Pat<(add sh2add_uw_op:$rs1, non_imm12:$rs2),
+ (SH2ADD_UW sh2add_uw_op:$rs1, GPR:$rs2)>;
+def : Pat<(add sh3add_uw_op:$rs1, non_imm12:$rs2),
+ (SH3ADD_UW sh3add_uw_op:$rs1, GPR:$rs2)>;
+
def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFE), non_imm12:$rs2)),
(SH1ADD (SRLIW GPR:$rs1, 1), GPR:$rs2)>;
def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFC), non_imm12:$rs2)),
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index 6fa4e37782b37..a283424571d85 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -1716,3 +1716,69 @@ define i64 @srli_4_sh3add(i64* %0, i64 %1) {
%5 = load i64, i64* %4, align 8
ret i64 %5
}
+
+define signext i16 @shl_2_sh1add(i16* %0, i32 signext %1) {
+; RV64I-LABEL: shl_2_sh1add:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a1, a1, 34
+; RV64I-NEXT: srli a1, a1, 31
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lh a0, 0(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: shl_2_sh1add:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: slli a1, a1, 2
+; RV64ZBA-NEXT: sh1add.uw a0, a1, a0
+; RV64ZBA-NEXT: lh a0, 0(a0)
+; RV64ZBA-NEXT: ret
+ %3 = shl i32 %1, 2
+ %4 = zext i32 %3 to i64
+ %5 = getelementptr inbounds i16, i16* %0, i64 %4
+ %6 = load i16, i16* %5, align 2
+ ret i16 %6
+}
+
+define signext i32 @shl_16_sh2add(i32* %0, i32 signext %1) {
+; RV64I-LABEL: shl_16_sh2add:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a1, a1, 48
+; RV64I-NEXT: srli a1, a1, 30
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lw a0, 0(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: shl_16_sh2add:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: slli a1, a1, 16
+; RV64ZBA-NEXT: sh2add.uw a0, a1, a0
+; RV64ZBA-NEXT: lw a0, 0(a0)
+; RV64ZBA-NEXT: ret
+ %3 = shl i32 %1, 16
+ %4 = zext i32 %3 to i64
+ %5 = getelementptr inbounds i32, i32* %0, i64 %4
+ %6 = load i32, i32* %5, align 4
+ ret i32 %6
+}
+
+define i64 @shl_31_sh3add(i64* %0, i32 signext %1) {
+; RV64I-LABEL: shl_31_sh3add:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a1, a1, 63
+; RV64I-NEXT: srli a1, a1, 29
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ld a0, 0(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: shl_31_sh3add:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: slli a1, a1, 31
+; RV64ZBA-NEXT: sh3add.uw a0, a1, a0
+; RV64ZBA-NEXT: ld a0, 0(a0)
+; RV64ZBA-NEXT: ret
+ %3 = shl i32 %1, 31
+ %4 = zext i32 %3 to i64
+ %5 = getelementptr inbounds i64, i64* %0, i64 %4
+ %6 = load i64, i64* %5, align 8
+ ret i64 %6
+}
More information about the llvm-commits
mailing list