[llvm] d42678b - [RISCV] Add side-effect-free vsetvli intrinsics

Mon Jan 24 06:02:29 PST 2022

Author: Fraser Cormack
Date: 2022-01-24T13:52:08Z
New Revision: d42678b453bc2587a42eef1ba4e5782b2c8c5ff1

URL: https://github.com/llvm/llvm-project/commit/d42678b453bc2587a42eef1ba4e5782b2c8c5ff1
DIFF: https://github.com/llvm/llvm-project/commit/d42678b453bc2587a42eef1ba4e5782b2c8c5ff1.diff

LOG: [RISCV] Add side-effect-free vsetvli intrinsics

This patch introduces new intrinsics that enable the use of vsetvli in
contexts where only the returned vector length is of interest. The
pre-existing intrinsics are marked with side-effects, which prevents
even trivial optimizations on/across them.

These intrinsics are intended to be used in situations where the vector
length is fed in turn to RVV intrinsics or to vector-predication
intrinsics during loop vectorization, for example. Those codegen paths
ensure that instructions are generated with their own implicit vsetvli,
so the vector length and vtype can be relied upon to be correct.

No corresponding C builtins are planned at this stage, though that is a
possibility for the future if the need arises.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D117910

Added: 
    

Modified: 
    llvm/include/llvm/IR/IntrinsicsRISCV.td
    llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
    llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
    llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index c332eb82a5906..99dd152fc0fc5 100644

--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -159,6 +159,22 @@ let TargetPrefix = "riscv" in {
                                        ImmArg<ArgIndex<0>>,
                                        ImmArg<ArgIndex<1>>]>;
 
+  // Versions without side effects: better optimizable and usable if only the
+  // returned vector length is important.
+  def int_riscv_vsetvli_opt   : Intrinsic<[llvm_anyint_ty],
+                               /* AVL */  [LLVMMatchType<0>,
+                               /* VSEW */  LLVMMatchType<0>,
+                               /* VLMUL */ LLVMMatchType<0>],
+                                          [IntrNoMem,
+                                           ImmArg<ArgIndex<1>>,
+                                           ImmArg<ArgIndex<2>>]>;
+  def int_riscv_vsetvlimax_opt : Intrinsic<[llvm_anyint_ty],
+                                /* VSEW */ [LLVMMatchType<0>,
+                                /* VLMUL */ LLVMMatchType<0>],
+                                          [IntrNoMem,
+                                           ImmArg<ArgIndex<0>>,
+                                           ImmArg<ArgIndex<1>>]>;
+
   // For unit stride load
   // Input: (pointer, vl)
   class RISCVUSLoad

diff  --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index e46aa6114fca2..df4e955ef583b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -495,6 +495,75 @@ void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
   ReplaceNode(Node, Store);
 }
 
+void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
+  if (!Subtarget->hasVInstructions())
+    return;
+
+  assert((Node->getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+          Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN) &&
+         "Unexpected opcode");
+
+  SDLoc DL(Node);
+  MVT XLenVT = Subtarget->getXLenVT();
+
+  bool HasChain = Node->getOpcode() == ISD::INTRINSIC_W_CHAIN;
+  unsigned IntNoOffset = HasChain ? 1 : 0;
+  unsigned IntNo = Node->getConstantOperandVal(IntNoOffset);
+
+  assert((IntNo == Intrinsic::riscv_vsetvli ||
+          IntNo == Intrinsic::riscv_vsetvlimax ||
+          IntNo == Intrinsic::riscv_vsetvli_opt ||
+          IntNo == Intrinsic::riscv_vsetvlimax_opt) &&
+         "Unexpected vsetvli intrinsic");
+
+  bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax ||
+               IntNo == Intrinsic::riscv_vsetvlimax_opt;
+  unsigned Offset = IntNoOffset + (VLMax ? 1 : 2);
+
+  assert(Node->getNumOperands() == Offset + 2 &&
+         "Unexpected number of operands");
+
+  unsigned SEW =
+      RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
+  RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
+      Node->getConstantOperandVal(Offset + 1) & 0x7);
+
+  unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
+                                            /*MaskAgnostic*/ false);
+  SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
+
+  SmallVector<EVT, 2> VTs = {XLenVT};
+  if (HasChain)
+    VTs.push_back(MVT::Other);
+
+  SDValue VLOperand;
+  unsigned Opcode = RISCV::PseudoVSETVLI;
+  if (VLMax) {
+    VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
+    Opcode = RISCV::PseudoVSETVLIX0;
+  } else {
+    VLOperand = Node->getOperand(IntNoOffset + 1);
+
+    if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
+      uint64_t AVL = C->getZExtValue();
+      if (isUInt<5>(AVL)) {
+        SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
+        SmallVector<SDValue, 3> Ops = {VLImm, VTypeIOp};
+        if (HasChain)
+          Ops.push_back(Node->getOperand(0));
+        ReplaceNode(
+            Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops));
+        return;
+      }
+    }
+  }
+
+  SmallVector<SDValue, 3> Ops = {VLOperand, VTypeIOp};
+  if (HasChain)
+    Ops.push_back(Node->getOperand(0));
+
+  ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops));
+}
 
 void RISCVDAGToDAGISel::Select(SDNode *Node) {
   // If we have a custom node, we have already selected.
@@ -1017,6 +1086,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
                                                {Cmp, Mask, VL, MaskSEW}));
       return;
     }
+    case Intrinsic::riscv_vsetvli_opt:
+    case Intrinsic::riscv_vsetvlimax_opt:
+      return selectVSETVLI(Node);
     }
     break;
   }
@@ -1026,54 +1098,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
       // By default we do not custom select any intrinsic.
     default:
       break;
-
     case Intrinsic::riscv_vsetvli:
-    case Intrinsic::riscv_vsetvlimax: {
-      if (!Subtarget->hasVInstructions())
-        break;
-
-      bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
-      unsigned Offset = VLMax ? 2 : 3;
-
-      assert(Node->getNumOperands() == Offset + 2 &&
-             "Unexpected number of operands");
-
-      unsigned SEW =
-          RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
-      RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
-          Node->getConstantOperandVal(Offset + 1) & 0x7);
-
-      unsigned VTypeI = RISCVVType::encodeVTYPE(
-          VLMul, SEW, /*TailAgnostic*/ true, /*MaskAgnostic*/ false);
-      SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
-
-      SDValue VLOperand;
-      unsigned Opcode = RISCV::PseudoVSETVLI;
-      if (VLMax) {
-        VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
-        Opcode = RISCV::PseudoVSETVLIX0;
-      } else {
-        VLOperand = Node->getOperand(2);
-
-        if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
-          uint64_t AVL = C->getZExtValue();
-          if (isUInt<5>(AVL)) {
-            SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
-            ReplaceNode(
-                Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, XLenVT,
-                                             MVT::Other, VLImm, VTypeIOp,
-                                             /* Chain */ Node->getOperand(0)));
-            return;
-          }
-        }
-      }
-
-      ReplaceNode(Node,
-                  CurDAG->getMachineNode(Opcode, DL, XLenVT,
-                                         MVT::Other, VLOperand, VTypeIOp,
-                                         /* Chain */ Node->getOperand(0)));
-      return;
-    }
+    case Intrinsic::riscv_vsetvlimax:
+      return selectVSETVLI(Node);
     case Intrinsic::riscv_vlseg2:
     case Intrinsic::riscv_vlseg3:
     case Intrinsic::riscv_vlseg4:

diff  --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index a2770089995d8..f4d6fdddca390 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -87,6 +87,8 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
   void selectVSSEG(SDNode *Node, bool IsMasked, bool IsStrided);
   void selectVSXSEG(SDNode *Node, bool IsMasked, bool IsOrdered);
 
+  void selectVSETVLI(SDNode *Node);
+
   // Return the RISC-V condition code that matches the given DAG integer
   // condition code. The CondCode must be one of those supported by the RISC-V
   // ISA (see translateSetCCForBranch).

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7d224e3968545..f7e4e36a20d15 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -8162,14 +8162,18 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     // We assume VLENB is no more than 65536 / 8 bytes.
     Known.Zero.setBitsFrom(14);
     break;
-  case ISD::INTRINSIC_W_CHAIN: {
-    unsigned IntNo = Op.getConstantOperandVal(1);
+  case ISD::INTRINSIC_W_CHAIN:
+  case ISD::INTRINSIC_WO_CHAIN: {
+    unsigned IntNo =
+        Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
     switch (IntNo) {
     default:
       // We can't do anything for most intrinsics.
       break;
     case Intrinsic::riscv_vsetvli:
     case Intrinsic::riscv_vsetvlimax:
+    case Intrinsic::riscv_vsetvli_opt:
+    case Intrinsic::riscv_vsetvlimax_opt:
       // Assume that VL output is positive and would fit in an int32_t.
       // TODO: VLEN might be capped at 16 bits in a future V spec update.
       if (BitWidth >= 32)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
index 081743b31b701..1c3c219c13041 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
@@ -3,6 +3,8 @@
 
 declare i32 @llvm.riscv.vsetvli.i32(i32, i32, i32)
 declare i32 @llvm.riscv.vsetvlimax.i32(i32, i32)
+declare i32 @llvm.riscv.vsetvli.opt.i32(i32, i32, i32)
+declare i32 @llvm.riscv.vsetvlimax.opt.i32(i32, i32)
 
 define void @test_vsetvli_e64mf8(i32 %avl) nounwind {
 ; CHECK-LABEL: test_vsetvli_e64mf8:
@@ -31,6 +33,68 @@ define void @test_vsetvlimax_e64m8() nounwind {
   ret void
 }
 
+define i32 @test_vsetvli_opt_e8m1(i32 %avl) nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e8m1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e8, m1, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i32 @llvm.riscv.vsetvli.opt.i32(i32 %avl, i32 0, i32 0)
+  ret i32 %vl
+}
+
+; Check that we remove the intrinsic if it's unused.
+define void @test_vsetvli_opt_e8m1_nouse(i32 %avl) nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e8m1_nouse:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret
+  call i32 @llvm.riscv.vsetvli.opt.i32(i32 %avl, i32 0, i32 0)
+  ret void
+}
+
+define i32 @test_vsetvli_opt_e16mf4(i32 %avl) nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e16mf4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i32 @llvm.riscv.vsetvli.opt.i32(i32 %avl, i32 1, i32 6)
+  ret i32 %vl
+}
+
+define i32 @test_vsetvli_opt_e32mf8_zero_avl() nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e32mf8_zero_avl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli a0, 0, e16, mf4, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i32 @llvm.riscv.vsetvli.opt.i32(i32 0, i32 1, i32 6)
+  ret i32 %vl
+}
+
+define i32 @test_vsetvlimax_opt_e32m2() nounwind {
+; CHECK-LABEL: test_vsetvlimax_opt_e32m2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i32 @llvm.riscv.vsetvlimax.opt.i32(i32 2, i32 1)
+  ret i32 %vl
+}
+
+define void @test_vsetvlimax_opt_e32m2_nouse() nounwind {
+; CHECK-LABEL: test_vsetvlimax_opt_e32m2_nouse:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret
+  call i32 @llvm.riscv.vsetvlimax.opt.i32(i32 2, i32 1)
+  ret void
+}
+
+define i32 @test_vsetvlimax_opt_e64m4() nounwind {
+; CHECK-LABEL: test_vsetvlimax_opt_e64m4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i32 @llvm.riscv.vsetvlimax.opt.i32(i32 3, i32 2)
+  ret i32 %vl
+}
+
 declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i32(<vscale x 4 x i32>*, i32)
 
 ; Check that we remove the redundant vsetvli when followed by another operation

diff  --git a/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll
index 2b745cb5eddaa..26c3aeeba38fb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll
@@ -3,6 +3,8 @@
 
 declare i64 @llvm.riscv.vsetvli.i64(i64, i64, i64)
 declare i64 @llvm.riscv.vsetvlimax.i64(i64, i64)
+declare i64 @llvm.riscv.vsetvli.opt.i64(i64, i64, i64)
+declare i64 @llvm.riscv.vsetvlimax.opt.i64(i64, i64)
 
 define void @test_vsetvli_e8m1(i64 %avl) nounwind {
 ; CHECK-LABEL: test_vsetvli_e8m1:
@@ -49,6 +51,68 @@ define void @test_vsetvlimax_e64m4() nounwind {
   ret void
 }
 
+define i64 @test_vsetvli_opt_e8m1(i64 %avl) nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e8m1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e8, m1, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i64 @llvm.riscv.vsetvli.opt.i64(i64 %avl, i64 0, i64 0)
+  ret i64 %vl
+}
+
+; Check that we remove the intrinsic if it's unused.
+define void @test_vsetvli_opt_e8m1_nouse(i64 %avl) nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e8m1_nouse:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret
+  call i64 @llvm.riscv.vsetvli.opt.i64(i64 %avl, i64 0, i64 0)
+  ret void
+}
+
+define i64 @test_vsetvli_opt_e16mf4(i64 %avl) nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e16mf4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i64 @llvm.riscv.vsetvli.opt.i64(i64 %avl, i64 1, i64 6)
+  ret i64 %vl
+}
+
+define i64 @test_vsetvli_opt_e32mf8_zero_avl() nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e32mf8_zero_avl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli a0, 0, e16, mf4, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i64 @llvm.riscv.vsetvli.opt.i64(i64 0, i64 1, i64 6)
+  ret i64 %vl
+}
+
+define i64 @test_vsetvlimax_opt_e32m2() nounwind {
+; CHECK-LABEL: test_vsetvlimax_opt_e32m2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i64 @llvm.riscv.vsetvlimax.opt.i64(i64 2, i64 1)
+  ret i64 %vl
+}
+
+define void @test_vsetvlimax_opt_e32m2_nouse() nounwind {
+; CHECK-LABEL: test_vsetvlimax_opt_e32m2_nouse:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret
+  call i64 @llvm.riscv.vsetvlimax.opt.i64(i64 2, i64 1)
+  ret void
+}
+
+define i64 @test_vsetvlimax_opt_e64m4() nounwind {
+; CHECK-LABEL: test_vsetvlimax_opt_e64m4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i64 @llvm.riscv.vsetvlimax.opt.i64(i64 3, i64 2)
+  ret i64 %vl
+}
+
 declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32>*, i64)
 
 ; Check that we remove the redundant vsetvli when followed by another operation