[llvm] b2e9684 - [RISCV] isel (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C).

Sat Jul 2 01:11:35 PDT 2022

Author: Craig Topper
Date: 2022-07-02T01:04:44-07:00
New Revision: b2e9684fe4d10dbdd7679a29c6f971dc59ede3dd

URL: https://github.com/llvm/llvm-project/commit/b2e9684fe4d10dbdd7679a29c6f971dc59ede3dd
DIFF: https://github.com/llvm/llvm-project/commit/b2e9684fe4d10dbdd7679a29c6f971dc59ede3dd.diff

LOG: [RISCV] isel (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C).

where C2 has 32 leading zeros and C3 trailing zeros.

When the shl is used by an add C is 1,2 or 3, we end up matching
(add (shl X, C), Y) first. This leaves an and with a constant that
is harder to materialize.

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
    llvm/test/CodeGen/RISCV/rv64zba.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index b0651701460d..f7161e92955c 100644

--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -728,6 +728,36 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
     ReplaceNode(Node, ADDI);
     return;
   }
+  case ISD::SHL: {
+    auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
+    if (!N1C)
+      break;
+    SDValue N0 = Node->getOperand(0);
+    if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
+        !isa<ConstantSDNode>(N0.getOperand(1)))
+      break;
+    unsigned ShAmt = N1C->getZExtValue();
+    uint64_t Mask = N0.getConstantOperandVal(1);
+
+    // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
+    // 32 leading zeros and C3 trailing zeros.
+    if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
+      unsigned XLen = Subtarget->getXLen();
+      unsigned LeadingZeros = XLen - (64 - countLeadingZeros(Mask));
+      unsigned TrailingZeros = countTrailingZeros(Mask);
+      if (TrailingZeros > 0 && LeadingZeros == 32) {
+        SDNode *SRLIW = CurDAG->getMachineNode(
+            RISCV::SRLIW, DL, VT, N0->getOperand(0),
+            CurDAG->getTargetConstant(TrailingZeros, DL, VT));
+        SDNode *SLLI = CurDAG->getMachineNode(
+            RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
+            CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
+        ReplaceNode(Node, SLLI);
+        return;
+      }
+    }
+    break;
+  }
   case ISD::SRL: {
     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
     if (!N1C)

diff  --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index 659ba921d44e..c6af0b7ed7cb 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -1309,3 +1309,78 @@ define i64 @srliw_3_sh3add(i64* %0, i32 signext %1) {
   %6 = load i64, i64* %5, align 8
   ret i64 %6
 }
+
+; FIXME: This should select sh2add with Zba.
+define signext i32 @srliw_1_sh2add(i32* %0, i32 signext %1) {
+; RV64I-LABEL: srliw_1_sh2add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 1
+; RV64I-NEXT:    slli a1, a1, 2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lw a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srliw_1_sh2add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    li a2, -2
+; RV64ZBA-NEXT:    zext.w a2, a2
+; RV64ZBA-NEXT:    and a1, a1, a2
+; RV64ZBA-NEXT:    sh1add a0, a1, a0
+; RV64ZBA-NEXT:    lw a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i32 %1, 1
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i32, i32* %0, i64 %4
+  %6 = load i32, i32* %5, align 4
+  ret i32 %6
+}
+
+; FIXME: This should select sh3add with Zba.
+define i64 @srliw_1_sh3add(i64* %0, i32 signext %1) {
+; RV64I-LABEL: srliw_1_sh3add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 1
+; RV64I-NEXT:    slli a1, a1, 3
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srliw_1_sh3add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    li a2, -2
+; RV64ZBA-NEXT:    zext.w a2, a2
+; RV64ZBA-NEXT:    and a1, a1, a2
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    ld a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i32 %1, 1
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i64, i64* %0, i64 %4
+  %6 = load i64, i64* %5, align 8
+  ret i64 %6
+}
+
+; FIXME: This should select sh3add with Zba.
+define i64 @srliw_2_sh3add(i64* %0, i32 signext %1) {
+; RV64I-LABEL: srliw_2_sh3add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 2
+; RV64I-NEXT:    slli a1, a1, 3
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srliw_2_sh3add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    li a2, -4
+; RV64ZBA-NEXT:    zext.w a2, a2
+; RV64ZBA-NEXT:    and a1, a1, a2
+; RV64ZBA-NEXT:    sh1add a0, a1, a0
+; RV64ZBA-NEXT:    ld a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i32 %1, 2
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i64, i64* %0, i64 %4
+  %6 = load i64, i64* %5, align 8
+  ret i64 %6
+}