[llvm] 460c1bd - [RISCV][InsertVSETVLI] Rewrite scalar insert forward rule in terms of demanded fields

Tue Jan 3 14:20:58 PST 2023

Author: Philip Reames
Date: 2023-01-03T14:19:52-08:00
New Revision: 460c1bd3440ebf5a601f643e992e3a0ad3471167

URL: https://github.com/llvm/llvm-project/commit/460c1bd3440ebf5a601f643e992e3a0ad3471167
DIFF: https://github.com/llvm/llvm-project/commit/460c1bd3440ebf5a601f643e992e3a0ad3471167.diff

LOG: [RISCV][InsertVSETVLI] Rewrite scalar insert forward rule in terms of demanded fields

This is mostly geared at consolidating logic into one form to reduce code duplication, but also has the effect of being a slight generalization. Since these operations aren't masked, we can ignore the mask policy bit when deciding on compatibility. The previous code was overly strict in checking that both policy bits matched.

Note: There's a slight difference from the reviewed version.  The reviewed version was based on a local revision which included the isCompatible change to only check AVL if VL is used.  I apparently never landed that change, and while functional, the functional change isn't visible without this one.  I chose to role the extra change into this patch.

Differential Revision: https://reviews.llvm.org/D140147

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
    llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 438e8ee619c85..401c24b5d4be6 100644

--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -261,9 +261,10 @@ static DemandedFields getDemanded(const MachineInstr &MI) {
   }
 
   // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0.
-  // As such, the result does not depend on LMUL.
-  if (isScalarMoveInstr(MI))
+  if (isScalarMoveInstr(MI)) {
     Res.LMUL = false;
+    Res.SEWLMULRatio = false;
+  }
 
   return Res;
 }
@@ -378,16 +379,6 @@ class VSETVLIInfo {
 
   bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
 
-  bool hasSameSEW(const VSETVLIInfo &Other) const {
-    assert(isValid() && Other.isValid() &&
-           "Can't compare invalid VSETVLIInfos");
-    assert(!isUnknown() && !Other.isUnknown() &&
-           "Can't compare VTYPE in unknown state");
-    assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
-           "Can't compare when only LMUL/SEW ratio is valid.");
-    return SEW == Other.SEW;
-  }
-
   bool hasSameVTYPE(const VSETVLIInfo &Other) const {
     assert(isValid() && Other.isValid() &&
            "Can't compare invalid VSETVLIInfos");
@@ -418,15 +409,6 @@ class VSETVLIInfo {
     return getSEWLMULRatio() == Other.getSEWLMULRatio();
   }
 
-  bool hasSamePolicy(const VSETVLIInfo &Other) const {
-    assert(isValid() && Other.isValid() &&
-           "Can't compare invalid VSETVLIInfos");
-    assert(!isUnknown() && !Other.isUnknown() &&
-           "Can't compare VTYPE in unknown state");
-    return TailAgnostic == Other.TailAgnostic &&
-           MaskAgnostic == Other.MaskAgnostic;
-  }
-
   bool hasCompatibleVTYPE(const DemandedFields &Used,
                           const VSETVLIInfo &Require) const {
     return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used);
@@ -454,8 +436,7 @@ class VSETVLIInfo {
       if (SEW == Require.SEW)
         return true;
 
-    // TODO: Check Used.VL here
-    if (!hasSameAVL(Require))
+    if (Used.VL && !hasSameAVL(Require))
       return false;
     return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used);
   }
@@ -804,23 +785,28 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
   if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
     return true;
 
-  const DemandedFields Used = getDemanded(MI);
-  if (CurInfo.isCompatible(Used, Require))
-    return false;
+  DemandedFields Used = getDemanded(MI);
 
   // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0.
-  // Additionally, if writing to an implicit_def operand, we don't need to
-  // preserve any other bits and are thus compatible with any larger etype,
-  // and can disregard policy bits.
   if (isScalarMoveInstr(MI) && CurInfo.hasEquallyZeroAVL(Require)) {
+    Used.VL = false;
+    // Additionally, if writing to an implicit_def operand, we don't need to
+    // preserve any other bits and are thus compatible with any larger etype,
+    // and can disregard policy bits.  Warning: It's tempting to try doing
+    // this for any tail agnostic operation, but we can't as TA requires
+    // tail lanes to either be the original value or -1.  We are writing
+    // unknown bits to the lanes here.
     auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg());
     if (VRegDef && VRegDef->isImplicitDef() &&
-        CurInfo.getSEW() >= Require.getSEW())
-      return false;
-    if (CurInfo.hasSameSEW(Require) && CurInfo.hasSamePolicy(Require))
-      return false;
+        CurInfo.getSEW() >= Require.getSEW()) {
+      Used.SEW = false;
+      Used.TailPolicy = false;
+    }
   }
 
+  if (CurInfo.isCompatible(Used, Require))
+    return false;
+
   // We didn't find a compatible value. If our AVL is a virtual register,
   // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
   // and the last VL/VTYPE we observed is the same, we don't need a

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
index 357d0cbdd478c..4c82f5d32ecc6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
@@ -181,7 +181,6 @@ define <vscale x 1 x i64> @test9(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i1
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetivli zero, 9, e64, m1, tu, mu
 ; CHECK-NEXT:    vadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
 ; CHECK-NEXT:    vmv.s.x v8, a0
 ; CHECK-NEXT:    ret
 entry:
@@ -227,7 +226,6 @@ define <vscale x 1 x double> @test12(<vscale x 1 x double> %a, double %b, <vscal
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetivli zero, 9, e64, m1, tu, mu
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
 ; CHECK-NEXT:    vfmv.s.f v8, fa0
 ; CHECK-NEXT:    ret
 entry: