[llvm] 0816b3e - [RISCV] Check floating point vector instruction with SEW=64 is valid when vsetvl insertion

Thu Aug 17 19:31:07 PDT 2023

Author: Kito Cheng
Date: 2023-08-18T10:31:01+08:00
New Revision: 0816b3efbfaaf958a3f2e842aa3eacd525e7ae12

URL: https://github.com/llvm/llvm-project/commit/0816b3efbfaaf958a3f2e842aa3eacd525e7ae12
DIFF: https://github.com/llvm/llvm-project/commit/0816b3efbfaaf958a3f2e842aa3eacd525e7ae12.diff

LOG: [RISCV] Check floating point vector instruction with SEW=64 is valid when vsetvl insertion

Scalar move and splat instruction are only demand the SEW is greater than
its own needs, but floating point vector with SEW=64 is not alwaws valid even
SEW=64 is valid, because we have a special configuration: zve64f.

So we need to check floating point vector instruction with SEW=64 is
valid when compute demand of floating point scalar move and splat
instruction.

Reviewed By: reames

Differential Revision: https://reviews.llvm.org/D158086

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
    llvm/test/CodeGen/RISCV/rvv/vsetvli-valid-elen-fp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 58270e839e2e14..bd99f0cc29aaa5 100644

--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -75,6 +75,16 @@ static uint16_t getRVVMCOpcode(uint16_t RVVPseudoOpcode) {
   return RVV->BaseInstr;
 }
 
+static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) {
+  switch (getRVVMCOpcode(MI.getOpcode())) {
+  default:
+    return false;
+  case RISCV::VFMV_S_F:
+  case RISCV::VFMV_V_F:
+    return true;
+  }
+}
+
 static bool isScalarExtractInstr(const MachineInstr &MI) {
   switch (getRVVMCOpcode(MI.getOpcode())) {
   default:
@@ -321,6 +331,8 @@ DemandedFields getDemanded(const MachineInstr &MI,
   // emitVSETVLIs) and pre-lowering forms.  The main implication of this is
   // that it can't use the value of a SEW, VL, or Policy operand as they might
   // be stale after lowering.
+  bool HasVInstructionsF64 =
+      MI.getMF()->getSubtarget<RISCVSubtarget>().hasVInstructionsF64();
 
   // Most instructions don't use any of these subfeilds.
   DemandedFields Res;
@@ -379,7 +391,8 @@ DemandedFields getDemanded(const MachineInstr &MI,
     // tail lanes to either be the original value or -1.  We are writing
     // unknown bits to the lanes here.
     if (hasUndefinedMergeOp(MI, *MRI)) {
-      Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
+      if (!isFloatScalarMoveOrScalarSplatInstr(MI) || HasVInstructionsF64)
+        Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
       Res.TailPolicy = false;
     }
   }
@@ -935,6 +948,8 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
     return true;
 
   DemandedFields Used = getDemanded(MI, MRI);
+  bool HasVInstructionsF64 =
+      MI.getMF()->getSubtarget<RISCVSubtarget>().hasVInstructionsF64();
 
   // A slidedown/slideup with an *undefined* merge op can freely clobber
   // elements not copied from the source vector (e.g. masked off, tail, or
@@ -962,7 +977,8 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
     Used.LMUL = false;
     Used.SEWLMULRatio = false;
     Used.VLAny = false;
-    Used.SEW = DemandedFields::SEWGreaterThanOrEqual;
+    if (!isFloatScalarMoveOrScalarSplatInstr(MI) || HasVInstructionsF64)
+      Used.SEW = DemandedFields::SEWGreaterThanOrEqual;
     Used.TailPolicy = false;
   }
 

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-valid-elen-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-valid-elen-fp.ll
index a4fa6b687c28db..bb877b3b213da1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-valid-elen-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-valid-elen-fp.ll
@@ -9,6 +9,7 @@ define void @foo(half %y, ptr %i64p) {
 ; CHECK-NO-FELEN64:       # %bb.0: # %entry
 ; CHECK-NO-FELEN64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; CHECK-NO-FELEN64-NEXT:    vle64.v v8, (a0)
+; CHECK-NO-FELEN64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
 ; CHECK-NO-FELEN64-NEXT:    vfmv.s.f v9, fa0
 ; CHECK-NO-FELEN64-NEXT:    #APP
 ; CHECK-NO-FELEN64-NEXT:    # use v8 v9
@@ -39,6 +40,7 @@ define void @bar(half %y, ptr %i32p) {
 ; CHECK-NO-FELEN64:       # %bb.0: # %entry
 ; CHECK-NO-FELEN64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; CHECK-NO-FELEN64-NEXT:    vle32.v v8, (a0)
+; CHECK-NO-FELEN64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
 ; CHECK-NO-FELEN64-NEXT:    vfmv.s.f v9, fa0
 ; CHECK-NO-FELEN64-NEXT:    #APP
 ; CHECK-NO-FELEN64-NEXT:    # use v8 v9