[llvm] fc9b264 - [RISCV][InsertVSETVLI] Treat vmv.v.i as-if it were vmv.s.x when VL=1, and inactive lanes are undefined

Thu Jun 15 14:10:11 PDT 2023

Author: Philip Reames
Date: 2023-06-15T14:10:04-07:00
New Revision: fc9b26440d4441a0b3bcb9202debe259b57f4692

URL: https://github.com/llvm/llvm-project/commit/fc9b26440d4441a0b3bcb9202debe259b57f4692
DIFF: https://github.com/llvm/llvm-project/commit/fc9b26440d4441a0b3bcb9202debe259b57f4692.diff

LOG: [RISCV][InsertVSETVLI] Treat vmv.v.i as-if it were vmv.s.x when VL=1, and inactive lanes are undefined

A vmv.v.i/x splats the immediate to all active lanes. For the active lanes, this is the same as vmv.s.x which inserts one scalar into the low lane. If we can ignore all the inactive lanes (because they are known undefined), then the two are semantically equivalent. We already reason about compatible VL/VTYPE combinations for vmv.s.x, apply the same logic to vmv.v.i.

Unlike a vmv.s.x, we do need to be careful not to increase LMUL. A splat instruction is probably linear in LMUL, so restrict this to LMUL1.

Differential Revision: https://reviews.llvm.org/D152845

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 6d862f83d93d8..7a3e313747942 100644

--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -85,6 +85,17 @@ static bool isScalarMoveInstr(const MachineInstr &MI) {
   }
 }
 
+static bool isScalarSplatInstr(const MachineInstr &MI) {
+  switch (getRVVMCOpcode(MI.getOpcode())) {
+  default:
+    return false;
+  case RISCV::VMV_V_I:
+  case RISCV::VMV_V_X:
+  case RISCV::VFMV_V_F:
+    return true;
+  }
+}
+
 static bool isVSlideInstr(const MachineInstr &MI) {
   switch (getRVVMCOpcode(MI.getOpcode())) {
   default:
@@ -911,6 +922,20 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
     Used.TailPolicy = false;
   }
 
+  // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same
+  // semantically as vmv.s.x.  This is particularly useful since we don't have an
+  // immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place.
+  // Since a splat is non-constant time in LMUL, we do need to be careful to not
+  // increase the number of active vector registers (unlike for vmv.s.x.)
+  if (isScalarSplatInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
+      isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
+    Used.LMUL = false;
+    Used.SEWLMULRatio = false;
+    Used.VLAny = false;
+    Used.SEW = DemandedFields::SEWGreaterThanOrEqual;
+    Used.TailPolicy = false;
+  }
+
   if (CurInfo.isCompatible(Used, Require, *MRI))
     return false;
 

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
index 908026a229380..2857d45242dfe 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
@@ -1431,8 +1431,8 @@ define i8 @vpreduce_mul_v1i8(i8 %s, <1 x i8> %v, <1 x i1> %m, i32 zeroext %evl)
 ; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vmsne.vi v9, v9, 0
 ; RV32-NEXT:    vmand.mm v0, v9, v0
-; RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; RV32-NEXT:    vmv.v.i v9, 1
+; RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; RV32-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; RV32-NEXT:    vmv.x.s a0, v8
 ; RV32-NEXT:    mv a1, a2
@@ -1452,8 +1452,8 @@ define i8 @vpreduce_mul_v1i8(i8 %s, <1 x i8> %v, <1 x i1> %m, i32 zeroext %evl)
 ; RV64-NEXT:    vmv.v.x v9, a1
 ; RV64-NEXT:    vmsne.vi v9, v9, 0
 ; RV64-NEXT:    vmand.mm v0, v9, v0
-; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; RV64-NEXT:    vmv.v.i v9, 1
+; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; RV64-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; RV64-NEXT:    vmv.x.s a0, v8
 ; RV64-NEXT:    mv a1, a2

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
index 22b7841e9924c..ccf7f7665a90e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
@@ -2199,9 +2199,7 @@ define i64 @vreduce_and_v2i64(ptr %x) {
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32-NEXT:    vle64.v v8, (a0)
-; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; RV32-NEXT:    vmv.v.i v9, -1
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32-NEXT:    vredand.vs v8, v8, v9
 ; RV32-NEXT:    vmv.x.s a0, v8
 ; RV32-NEXT:    li a1, 32
@@ -5686,9 +5684,7 @@ define i64 @vreduce_umin_v2i64(ptr %x) {
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32-NEXT:    vle64.v v8, (a0)
-; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; RV32-NEXT:    vmv.v.i v9, -1
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32-NEXT:    vredminu.vs v8, v8, v9
 ; RV32-NEXT:    vmv.x.s a0, v8
 ; RV32-NEXT:    li a1, 32