[llvm] af9b25f - [RISCV] Optimize floating point scalar move and splat

Wed Sep 6 01:39:37 PDT 2023

Author: Kito Cheng
Date: 2023-09-06T16:39:30+08:00
New Revision: af9b25f9dbc261c3da42660181da3975e3a85d60

URL: https://github.com/llvm/llvm-project/commit/af9b25f9dbc261c3da42660181da3975e3a85d60
DIFF: https://github.com/llvm/llvm-project/commit/af9b25f9dbc261c3da42660181da3975e3a85d60.diff

LOG: [RISCV] Optimize floating point scalar move and splat

In D158086, we limit all floating point scalar move and splat can't fuse
vsetvli with different SEW, and this patch try to relax the constraint
as possible by introducing new SEW demand type:
SEWGreaterThanOrEqualAndLessThan64, that allow SEW fused with larger
SEW, but constraint it can't fused with SEW=64.

Reviewed By: rogfer01

Differential Revision: https://reviews.llvm.org/D158177

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
    llvm/test/CodeGen/RISCV/rvv/vsetvli-valid-elen-fp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index c29abe8b7eed84..b42ad269c18de6 100644

--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -212,10 +212,14 @@ struct DemandedFields {
   bool VLZeroness = false;
   // What properties of SEW we need to preserve.
   enum : uint8_t {
-    SEWEqual = 2,              // The exact value of SEW needs to be preserved.
-    SEWGreaterThanOrEqual = 1, // SEW can be changed as long as it's greater
+    SEWEqual = 3,              // The exact value of SEW needs to be preserved.
+    SEWGreaterThanOrEqual = 2, // SEW can be changed as long as it's greater
                                // than or equal to the original value.
-    SEWNone = 0                // We don't need to preserve SEW at all.
+    SEWGreaterThanOrEqualAndLessThan64 =
+        1,      // SEW can be changed as long as it's greater
+                // than or equal to the original value, but must be less
+                // than 64.
+    SEWNone = 0 // We don't need to preserve SEW at all.
   } SEW = SEWNone;
   bool LMUL = false;
   bool SEWLMULRatio = false;
@@ -267,6 +271,9 @@ struct DemandedFields {
     case SEWGreaterThanOrEqual:
       OS << "SEWGreaterThanOrEqual";
       break;
+    case SEWGreaterThanOrEqualAndLessThan64:
+      OS << "SEWGreaterThanOrEqualAndLessThan64";
+      break;
     case SEWNone:
       OS << "SEWNone";
       break;
@@ -302,6 +309,11 @@ static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
       RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
     return false;
 
+  if (Used.SEW == DemandedFields::SEWGreaterThanOrEqualAndLessThan64 &&
+      (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) ||
+       RISCVVType::getSEW(NewVType) >= 64))
+    return false;
+
   if (Used.LMUL &&
       RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
     return false;
@@ -391,7 +403,9 @@ DemandedFields getDemanded(const MachineInstr &MI,
     // tail lanes to either be the original value or -1.  We are writing
     // unknown bits to the lanes here.
     if (hasUndefinedMergeOp(MI, *MRI)) {
-      if (!isFloatScalarMoveOrScalarSplatInstr(MI) || HasVInstructionsF64)
+      if (isFloatScalarMoveOrScalarSplatInstr(MI) && !HasVInstructionsF64)
+        Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
+      else
         Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
       Res.TailPolicy = false;
     }
@@ -974,7 +988,9 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
     Used.LMUL = false;
     Used.SEWLMULRatio = false;
     Used.VLAny = false;
-    if (!isFloatScalarMoveOrScalarSplatInstr(MI) || HasVInstructionsF64)
+    if (isFloatScalarMoveOrScalarSplatInstr(MI) && !HasVInstructionsF64)
+      Used.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
+    else
       Used.SEW = DemandedFields::SEWGreaterThanOrEqual;
     Used.TailPolicy = false;
   }

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-valid-elen-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-valid-elen-fp.ll
index bb877b3b213da1..7bae1160a8a5fc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-valid-elen-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-valid-elen-fp.ll
@@ -40,7 +40,6 @@ define void @bar(half %y, ptr %i32p) {
 ; CHECK-NO-FELEN64:       # %bb.0: # %entry
 ; CHECK-NO-FELEN64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; CHECK-NO-FELEN64-NEXT:    vle32.v v8, (a0)
-; CHECK-NO-FELEN64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
 ; CHECK-NO-FELEN64-NEXT:    vfmv.s.f v9, fa0
 ; CHECK-NO-FELEN64-NEXT:    #APP
 ; CHECK-NO-FELEN64-NEXT:    # use v8 v9