[llvm] a41c1f3 - [RISCV] Make selectShiftMask look for negate opportunities after looking through AND.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 23 14:24:19 PDT 2022
Author: Craig Topper
Date: 2022-10-23T14:23:13-07:00
New Revision: a41c1f31689b4d95b43cbfccdd0040d134b1922f
URL: https://github.com/llvm/llvm-project/commit/a41c1f31689b4d95b43cbfccdd0040d134b1922f
DIFF: https://github.com/llvm/llvm-project/commit/a41c1f31689b4d95b43cbfccdd0040d134b1922f.diff
LOG: [RISCV] Make selectShiftMask look for negate opportunities after looking through AND.
Previously we would only look for an AND or a negate. But its
possible there is a negate after looking through the AND.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
llvm/test/CodeGen/RISCV/rotl-rotr.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index d62098d45717..e33bcc5e04e2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -2061,11 +2061,13 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
SDValue &ShAmt) {
+ ShAmt = N;
+
// Shift instructions on RISCV only read the lower 5 or 6 bits of the shift
// amount. If there is an AND on the shift amount, we can bypass it if it
// doesn't affect any of those bits.
- if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
- const APInt &AndMask = N->getConstantOperandAPInt(1);
+ if (ShAmt.getOpcode() == ISD::AND && isa<ConstantSDNode>(ShAmt.getOperand(1))) {
+ const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
// Since the max shift amount is a power of 2 we can subtract 1 to make a
// mask that covers the bits needed to represent all shift amounts.
@@ -2073,35 +2075,34 @@ bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
if (ShMask.isSubsetOf(AndMask)) {
- ShAmt = N.getOperand(0);
- return true;
+ ShAmt = ShAmt.getOperand(0);
+ } else {
+ // SimplifyDemandedBits may have optimized the mask so try restoring any
+ // bits that are known zero.
+ KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
+ if (!ShMask.isSubsetOf(AndMask | Known.Zero))
+ return true;
+ ShAmt = ShAmt.getOperand(0);
}
+ }
- // SimplifyDemandedBits may have optimized the mask so try restoring any
- // bits that are known zero.
- KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0));
- if (ShMask.isSubsetOf(AndMask | Known.Zero)) {
- ShAmt = N.getOperand(0);
- return true;
- }
- } else if (N.getOpcode() == ISD::SUB &&
- isa<ConstantSDNode>(N.getOperand(0))) {
- uint64_t Imm = N.getConstantOperandVal(0);
+ if (ShAmt.getOpcode() == ISD::SUB &&
+ isa<ConstantSDNode>(ShAmt.getOperand(0))) {
+ uint64_t Imm = ShAmt.getConstantOperandVal(0);
// If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
// generate a NEG instead of a SUB of a constant.
if (Imm != 0 && Imm % ShiftWidth == 0) {
- SDLoc DL(N);
- EVT VT = N.getValueType();
+ SDLoc DL(ShAmt);
+ EVT VT = ShAmt.getValueType();
SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
- N.getOperand(1));
+ ShAmt.getOperand(1));
ShAmt = SDValue(Neg, 0);
return true;
}
}
- ShAmt = N;
return true;
}
diff --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll
index 19480e6c65e5..24c01605ae73 100644
--- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll
+++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll
@@ -1623,3 +1623,215 @@ define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
%3 = add i64 %1, %2
ret i64 %3
}
+
+define i64 @rotl_64_zext(i64 %x, i32 %y) nounwind {
+; RV32I-LABEL: rotl_64_zext:
+; RV32I: # %bb.0:
+; RV32I-NEXT: neg a4, a2
+; RV32I-NEXT: sll a5, a0, a2
+; RV32I-NEXT: addi a3, a2, -32
+; RV32I-NEXT: slti a6, a3, 0
+; RV32I-NEXT: neg a6, a6
+; RV32I-NEXT: bltz a3, .LBB24_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: sll a3, a0, a3
+; RV32I-NEXT: j .LBB24_3
+; RV32I-NEXT: .LBB24_2:
+; RV32I-NEXT: sll a3, a1, a2
+; RV32I-NEXT: xori a7, a2, 31
+; RV32I-NEXT: srli t0, a0, 1
+; RV32I-NEXT: srl a7, t0, a7
+; RV32I-NEXT: or a3, a3, a7
+; RV32I-NEXT: .LBB24_3:
+; RV32I-NEXT: and a5, a6, a5
+; RV32I-NEXT: li a6, 32
+; RV32I-NEXT: sub a7, a6, a2
+; RV32I-NEXT: srl a6, a1, a4
+; RV32I-NEXT: bltz a7, .LBB24_5
+; RV32I-NEXT: # %bb.4:
+; RV32I-NEXT: mv a0, a6
+; RV32I-NEXT: j .LBB24_6
+; RV32I-NEXT: .LBB24_5:
+; RV32I-NEXT: li t0, 64
+; RV32I-NEXT: sub a2, t0, a2
+; RV32I-NEXT: srl a0, a0, a4
+; RV32I-NEXT: xori a2, a2, 31
+; RV32I-NEXT: slli a1, a1, 1
+; RV32I-NEXT: sll a1, a1, a2
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: .LBB24_6:
+; RV32I-NEXT: slti a1, a7, 0
+; RV32I-NEXT: neg a1, a1
+; RV32I-NEXT: and a1, a1, a6
+; RV32I-NEXT: or a1, a3, a1
+; RV32I-NEXT: or a0, a5, a0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: rotl_64_zext:
+; RV64I: # %bb.0:
+; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: sll a1, a0, a1
+; RV64I-NEXT: srl a0, a0, a2
+; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: ret
+;
+; RV32ZBB-LABEL: rotl_64_zext:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: neg a4, a2
+; RV32ZBB-NEXT: sll a5, a0, a2
+; RV32ZBB-NEXT: addi a3, a2, -32
+; RV32ZBB-NEXT: slti a6, a3, 0
+; RV32ZBB-NEXT: neg a6, a6
+; RV32ZBB-NEXT: bltz a3, .LBB24_2
+; RV32ZBB-NEXT: # %bb.1:
+; RV32ZBB-NEXT: sll a3, a0, a3
+; RV32ZBB-NEXT: j .LBB24_3
+; RV32ZBB-NEXT: .LBB24_2:
+; RV32ZBB-NEXT: sll a3, a1, a2
+; RV32ZBB-NEXT: xori a7, a2, 31
+; RV32ZBB-NEXT: srli t0, a0, 1
+; RV32ZBB-NEXT: srl a7, t0, a7
+; RV32ZBB-NEXT: or a3, a3, a7
+; RV32ZBB-NEXT: .LBB24_3:
+; RV32ZBB-NEXT: and a5, a6, a5
+; RV32ZBB-NEXT: li a6, 32
+; RV32ZBB-NEXT: sub a7, a6, a2
+; RV32ZBB-NEXT: srl a6, a1, a4
+; RV32ZBB-NEXT: bltz a7, .LBB24_5
+; RV32ZBB-NEXT: # %bb.4:
+; RV32ZBB-NEXT: mv a0, a6
+; RV32ZBB-NEXT: j .LBB24_6
+; RV32ZBB-NEXT: .LBB24_5:
+; RV32ZBB-NEXT: li t0, 64
+; RV32ZBB-NEXT: sub a2, t0, a2
+; RV32ZBB-NEXT: srl a0, a0, a4
+; RV32ZBB-NEXT: xori a2, a2, 31
+; RV32ZBB-NEXT: slli a1, a1, 1
+; RV32ZBB-NEXT: sll a1, a1, a2
+; RV32ZBB-NEXT: or a0, a0, a1
+; RV32ZBB-NEXT: .LBB24_6:
+; RV32ZBB-NEXT: slti a1, a7, 0
+; RV32ZBB-NEXT: neg a1, a1
+; RV32ZBB-NEXT: and a1, a1, a6
+; RV32ZBB-NEXT: or a1, a3, a1
+; RV32ZBB-NEXT: or a0, a5, a0
+; RV32ZBB-NEXT: ret
+;
+; RV64ZBB-LABEL: rotl_64_zext:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: rol a0, a0, a1
+; RV64ZBB-NEXT: ret
+ %z = sub i32 64, %y
+ %zext = zext i32 %z to i64
+ %zexty = zext i32 %y to i64
+ %b = shl i64 %x, %zexty
+ %c = lshr i64 %x, %zext
+ %d = or i64 %b, %c
+ ret i64 %d
+}
+
+define i64 @rotr_64_zext(i64 %x, i32 %y) nounwind {
+; RV32I-LABEL: rotr_64_zext:
+; RV32I: # %bb.0:
+; RV32I-NEXT: neg a4, a2
+; RV32I-NEXT: srl a5, a1, a2
+; RV32I-NEXT: addi a3, a2, -32
+; RV32I-NEXT: slti a6, a3, 0
+; RV32I-NEXT: neg a6, a6
+; RV32I-NEXT: bltz a3, .LBB25_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: srl a3, a1, a3
+; RV32I-NEXT: j .LBB25_3
+; RV32I-NEXT: .LBB25_2:
+; RV32I-NEXT: srl a3, a0, a2
+; RV32I-NEXT: xori a7, a2, 31
+; RV32I-NEXT: slli t0, a1, 1
+; RV32I-NEXT: sll a7, t0, a7
+; RV32I-NEXT: or a3, a3, a7
+; RV32I-NEXT: .LBB25_3:
+; RV32I-NEXT: and a5, a6, a5
+; RV32I-NEXT: li a6, 32
+; RV32I-NEXT: sub a7, a6, a2
+; RV32I-NEXT: sll a6, a0, a4
+; RV32I-NEXT: bltz a7, .LBB25_5
+; RV32I-NEXT: # %bb.4:
+; RV32I-NEXT: mv a1, a6
+; RV32I-NEXT: j .LBB25_6
+; RV32I-NEXT: .LBB25_5:
+; RV32I-NEXT: li t0, 64
+; RV32I-NEXT: sub a2, t0, a2
+; RV32I-NEXT: sll a1, a1, a4
+; RV32I-NEXT: xori a2, a2, 31
+; RV32I-NEXT: srli a0, a0, 1
+; RV32I-NEXT: srl a0, a0, a2
+; RV32I-NEXT: or a1, a1, a0
+; RV32I-NEXT: .LBB25_6:
+; RV32I-NEXT: slti a0, a7, 0
+; RV32I-NEXT: neg a0, a0
+; RV32I-NEXT: and a0, a0, a6
+; RV32I-NEXT: or a0, a3, a0
+; RV32I-NEXT: or a1, a5, a1
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: rotr_64_zext:
+; RV64I: # %bb.0:
+; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: srl a1, a0, a1
+; RV64I-NEXT: sll a0, a0, a2
+; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: ret
+;
+; RV32ZBB-LABEL: rotr_64_zext:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: neg a4, a2
+; RV32ZBB-NEXT: srl a5, a1, a2
+; RV32ZBB-NEXT: addi a3, a2, -32
+; RV32ZBB-NEXT: slti a6, a3, 0
+; RV32ZBB-NEXT: neg a6, a6
+; RV32ZBB-NEXT: bltz a3, .LBB25_2
+; RV32ZBB-NEXT: # %bb.1:
+; RV32ZBB-NEXT: srl a3, a1, a3
+; RV32ZBB-NEXT: j .LBB25_3
+; RV32ZBB-NEXT: .LBB25_2:
+; RV32ZBB-NEXT: srl a3, a0, a2
+; RV32ZBB-NEXT: xori a7, a2, 31
+; RV32ZBB-NEXT: slli t0, a1, 1
+; RV32ZBB-NEXT: sll a7, t0, a7
+; RV32ZBB-NEXT: or a3, a3, a7
+; RV32ZBB-NEXT: .LBB25_3:
+; RV32ZBB-NEXT: and a5, a6, a5
+; RV32ZBB-NEXT: li a6, 32
+; RV32ZBB-NEXT: sub a7, a6, a2
+; RV32ZBB-NEXT: sll a6, a0, a4
+; RV32ZBB-NEXT: bltz a7, .LBB25_5
+; RV32ZBB-NEXT: # %bb.4:
+; RV32ZBB-NEXT: mv a1, a6
+; RV32ZBB-NEXT: j .LBB25_6
+; RV32ZBB-NEXT: .LBB25_5:
+; RV32ZBB-NEXT: li t0, 64
+; RV32ZBB-NEXT: sub a2, t0, a2
+; RV32ZBB-NEXT: sll a1, a1, a4
+; RV32ZBB-NEXT: xori a2, a2, 31
+; RV32ZBB-NEXT: srli a0, a0, 1
+; RV32ZBB-NEXT: srl a0, a0, a2
+; RV32ZBB-NEXT: or a1, a1, a0
+; RV32ZBB-NEXT: .LBB25_6:
+; RV32ZBB-NEXT: slti a0, a7, 0
+; RV32ZBB-NEXT: neg a0, a0
+; RV32ZBB-NEXT: and a0, a0, a6
+; RV32ZBB-NEXT: or a0, a3, a0
+; RV32ZBB-NEXT: or a1, a5, a1
+; RV32ZBB-NEXT: ret
+;
+; RV64ZBB-LABEL: rotr_64_zext:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: ror a0, a0, a1
+; RV64ZBB-NEXT: ret
+ %z = sub i32 64, %y
+ %zext = zext i32 %z to i64
+ %zexty = zext i32 %y to i64
+ %b = lshr i64 %x, %zexty
+ %c = shl i64 %x, %zext
+ %d = or i64 %b, %c
+ ret i64 %d
+}
More information about the llvm-commits
mailing list