[llvm] bbd6a2d - [RISCV] Convert implicit_def tuples to noreg in post-isel peephole (#91173)

Wed May 8 00:38:17 PDT 2024

Author: Luke Lau
Date: 2024-05-08T15:38:13+08:00
New Revision: bbd6a2d85c44d99e66b471d251a742f7551a0c61

URL: https://github.com/llvm/llvm-project/commit/bbd6a2d85c44d99e66b471d251a742f7551a0c61
DIFF: https://github.com/llvm/llvm-project/commit/bbd6a2d85c44d99e66b471d251a742f7551a0c61.diff

LOG: [RISCV] Convert implicit_def tuples to noreg in post-isel peephole (#91173)

If a segmented load has an undefined passthru then it will be selected
as a reg_sequence with implicit_def operands, which currently slips
through the implicit_def -> noreg peephole.

This patch fixes this so we're able to infer if the passthru is
undefined without the need for looking through vreg definitions with
MachineRegisterInfo, which will help with moving RISCVInsertVSETVLI to
LiveIntervals in #70549

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
    llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
    llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
    llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
    llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index dc3ad5ac5908c..e73a3af92af6f 100644

--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3478,8 +3478,15 @@ static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
 }
 
 static bool isImplicitDef(SDValue V) {
-  return V.isMachineOpcode() &&
-         V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
+  if (!V.isMachineOpcode())
+    return false;
+  if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
+    for (unsigned I = 1; I < V.getNumOperands(); I += 2)
+      if (!isImplicitDef(V.getOperand(I)))
+        return false;
+    return true;
+  }
+  return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
 }
 
 // Optimize masked RVV pseudo instructions with a known all-ones mask to their

diff  --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 06456f97f5ebc..5f8b610e52338 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -173,8 +173,7 @@ static bool isMaskRegOp(const MachineInstr &MI) {
 /// Note that this is 
diff erent from "agnostic" as defined by the vector
 /// specification.  Agnostic requires each lane to either be undisturbed, or
 /// take the value -1; no other value is allowed.
-static bool hasUndefinedMergeOp(const MachineInstr &MI,
-                                const MachineRegisterInfo &MRI) {
+static bool hasUndefinedMergeOp(const MachineInstr &MI) {
 
   unsigned UseOpIdx;
   if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
@@ -182,35 +181,10 @@ static bool hasUndefinedMergeOp(const MachineInstr &MI,
     // lanes are undefined.
     return true;
 
-  // If the tied operand is NoReg, an IMPLICIT_DEF, or a REG_SEQEUENCE whose
-  // operands are solely IMPLICIT_DEFS, then the pass through lanes are
-  // undefined.
+  // All undefined passthrus should be $noreg: see
+  // RISCVDAGToDAGISel::doPeepholeNoRegPassThru
   const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
-  if (UseMO.getReg() == RISCV::NoRegister)
-    return true;
-
-  if (UseMO.isUndef())
-    return true;
-  if (UseMO.getReg().isPhysical())
-    return false;
-
-  MachineInstr *UseMI = MRI.getUniqueVRegDef(UseMO.getReg());
-  assert(UseMI);
-  if (UseMI->isImplicitDef())
-    return true;
-
-  if (UseMI->isRegSequence()) {
-    for (unsigned i = 1, e = UseMI->getNumOperands(); i < e; i += 2) {
-      MachineInstr *SourceMI =
-          MRI.getUniqueVRegDef(UseMI->getOperand(i).getReg());
-      assert(SourceMI);
-      if (!SourceMI->isImplicitDef())
-        return false;
-    }
-    return true;
-  }
-
-  return false;
+  return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
 }
 
 /// Which subfields of VL or VTYPE have values we need to preserve?
@@ -429,7 +403,7 @@ DemandedFields getDemanded(const MachineInstr &MI,
     // this for any tail agnostic operation, but we can't as TA requires
     // tail lanes to either be the original value or -1.  We are writing
     // unknown bits to the lanes here.
-    if (hasUndefinedMergeOp(MI, *MRI)) {
+    if (hasUndefinedMergeOp(MI)) {
       if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
         Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
       else
@@ -913,7 +887,7 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
 
   bool TailAgnostic = true;
   bool MaskAgnostic = true;
-  if (!hasUndefinedMergeOp(MI, *MRI)) {
+  if (!hasUndefinedMergeOp(MI)) {
     // Start with undisturbed.
     TailAgnostic = false;
     MaskAgnostic = false;
@@ -1109,7 +1083,7 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
   // * The LMUL1 restriction is for machines whose latency may depend on VL.
   // * As above, this is only legal for tail "undefined" not "agnostic".
   if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
-      isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
+      isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI)) {
     Used.VLAny = false;
     Used.VLZeroness = true;
     Used.LMUL = false;
@@ -1121,8 +1095,9 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
   // immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place.
   // Since a splat is non-constant time in LMUL, we do need to be careful to not
   // increase the number of active vector registers (unlike for vmv.s.x.)
-  if (isScalarSplatInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
-      isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
+  if (isScalarSplatInstr(MI) && Require.hasAVLImm() &&
+      Require.getAVLImm() == 1 && isLMUL1OrSmaller(CurInfo.getVLMUL()) &&
+      hasUndefinedMergeOp(MI)) {
     Used.LMUL = false;
     Used.SEWLMULRatio = false;
     Used.VLAny = false;

diff  --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
index 407c782d3377a..e7913fc53df0f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
@@ -13,13 +13,8 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind {
 ; SPILL-O0-NEXT:    csrr a2, vlenb
 ; SPILL-O0-NEXT:    slli a2, a2, 1
 ; SPILL-O0-NEXT:    sub sp, sp, a2
-; SPILL-O0-NEXT:    # implicit-def: $v8
-; SPILL-O0-NEXT:    # implicit-def: $v9
-; SPILL-O0-NEXT:    # implicit-def: $v10
-; SPILL-O0-NEXT:    # implicit-def: $v9
-; SPILL-O0-NEXT:    # kill: def $v8 killed $v8 def $v8_v9
-; SPILL-O0-NEXT:    vmv1r.v v9, v10
 ; SPILL-O0-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; SPILL-O0-NEXT:    # implicit-def: $v8_v9
 ; SPILL-O0-NEXT:    vlseg2e32.v v8, (a0)
 ; SPILL-O0-NEXT:    vmv1r.v v8, v9
 ; SPILL-O0-NEXT:    addi a0, sp, 16
@@ -95,13 +90,8 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i32 %vl) nounwind {
 ; SPILL-O0-NEXT:    csrr a2, vlenb
 ; SPILL-O0-NEXT:    slli a2, a2, 1
 ; SPILL-O0-NEXT:    sub sp, sp, a2
-; SPILL-O0-NEXT:    # implicit-def: $v8
-; SPILL-O0-NEXT:    # implicit-def: $v9
-; SPILL-O0-NEXT:    # implicit-def: $v10
-; SPILL-O0-NEXT:    # implicit-def: $v9
-; SPILL-O0-NEXT:    # kill: def $v8 killed $v8 def $v8_v9
-; SPILL-O0-NEXT:    vmv1r.v v9, v10
 ; SPILL-O0-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; SPILL-O0-NEXT:    # implicit-def: $v8_v9
 ; SPILL-O0-NEXT:    vlseg2e32.v v8, (a0)
 ; SPILL-O0-NEXT:    vmv1r.v v8, v9
 ; SPILL-O0-NEXT:    addi a0, sp, 16
@@ -177,13 +167,8 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i32 %vl) nounwind {
 ; SPILL-O0-NEXT:    csrr a2, vlenb
 ; SPILL-O0-NEXT:    slli a2, a2, 1
 ; SPILL-O0-NEXT:    sub sp, sp, a2
-; SPILL-O0-NEXT:    # implicit-def: $v8m2
-; SPILL-O0-NEXT:    # implicit-def: $v10m2
-; SPILL-O0-NEXT:    # implicit-def: $v12m2
-; SPILL-O0-NEXT:    # implicit-def: $v10m2
-; SPILL-O0-NEXT:    # kill: def $v8m2 killed $v8m2 def $v8m2_v10m2
-; SPILL-O0-NEXT:    vmv2r.v v10, v12
 ; SPILL-O0-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; SPILL-O0-NEXT:    # implicit-def: $v8m2_v10m2
 ; SPILL-O0-NEXT:    vlseg2e32.v v8, (a0)
 ; SPILL-O0-NEXT:    vmv2r.v v8, v10
 ; SPILL-O0-NEXT:    addi a0, sp, 16
@@ -262,13 +247,8 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i32 %vl) nounwind {
 ; SPILL-O0-NEXT:    csrr a2, vlenb
 ; SPILL-O0-NEXT:    slli a2, a2, 2
 ; SPILL-O0-NEXT:    sub sp, sp, a2
-; SPILL-O0-NEXT:    # implicit-def: $v8m4
-; SPILL-O0-NEXT:    # implicit-def: $v12m4
-; SPILL-O0-NEXT:    # implicit-def: $v16m4
-; SPILL-O0-NEXT:    # implicit-def: $v12m4
-; SPILL-O0-NEXT:    # kill: def $v8m4 killed $v8m4 def $v8m4_v12m4
-; SPILL-O0-NEXT:    vmv4r.v v12, v16
 ; SPILL-O0-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; SPILL-O0-NEXT:    # implicit-def: $v8m4_v12m4
 ; SPILL-O0-NEXT:    vlseg2e32.v v8, (a0)
 ; SPILL-O0-NEXT:    vmv4r.v v8, v12
 ; SPILL-O0-NEXT:    addi a0, sp, 16
@@ -347,16 +327,8 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i32 %vl) nounwind {
 ; SPILL-O0-NEXT:    csrr a2, vlenb
 ; SPILL-O0-NEXT:    slli a2, a2, 1
 ; SPILL-O0-NEXT:    sub sp, sp, a2
-; SPILL-O0-NEXT:    # implicit-def: $v8m2
-; SPILL-O0-NEXT:    # implicit-def: $v10m2
-; SPILL-O0-NEXT:    # implicit-def: $v16m2
-; SPILL-O0-NEXT:    # implicit-def: $v10m2
-; SPILL-O0-NEXT:    # implicit-def: $v14m2
-; SPILL-O0-NEXT:    # implicit-def: $v10m2
-; SPILL-O0-NEXT:    # kill: def $v8m2 killed $v8m2 def $v8m2_v10m2_v12m2
-; SPILL-O0-NEXT:    vmv2r.v v10, v16
-; SPILL-O0-NEXT:    vmv2r.v v12, v14
 ; SPILL-O0-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; SPILL-O0-NEXT:    # implicit-def: $v8m2_v10m2_v12m2
 ; SPILL-O0-NEXT:    vlseg3e32.v v8, (a0)
 ; SPILL-O0-NEXT:    vmv2r.v v8, v10
 ; SPILL-O0-NEXT:    addi a0, sp, 16

diff  --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
index 1c1544b4efa0b..dd575b3fceb55 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
@@ -13,13 +13,8 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind {
 ; SPILL-O0-NEXT:    csrr a2, vlenb
 ; SPILL-O0-NEXT:    slli a2, a2, 1
 ; SPILL-O0-NEXT:    sub sp, sp, a2
-; SPILL-O0-NEXT:    # implicit-def: $v8
-; SPILL-O0-NEXT:    # implicit-def: $v9
-; SPILL-O0-NEXT:    # implicit-def: $v10
-; SPILL-O0-NEXT:    # implicit-def: $v9
-; SPILL-O0-NEXT:    # kill: def $v8 killed $v8 def $v8_v9
-; SPILL-O0-NEXT:    vmv1r.v v9, v10
 ; SPILL-O0-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; SPILL-O0-NEXT:    # implicit-def: $v8_v9
 ; SPILL-O0-NEXT:    vlseg2e32.v v8, (a0)
 ; SPILL-O0-NEXT:    vmv1r.v v8, v9
 ; SPILL-O0-NEXT:    addi a0, sp, 16
@@ -95,13 +90,8 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i64 %vl) nounwind {
 ; SPILL-O0-NEXT:    csrr a2, vlenb
 ; SPILL-O0-NEXT:    slli a2, a2, 1
 ; SPILL-O0-NEXT:    sub sp, sp, a2
-; SPILL-O0-NEXT:    # implicit-def: $v8
-; SPILL-O0-NEXT:    # implicit-def: $v9
-; SPILL-O0-NEXT:    # implicit-def: $v10
-; SPILL-O0-NEXT:    # implicit-def: $v9
-; SPILL-O0-NEXT:    # kill: def $v8 killed $v8 def $v8_v9
-; SPILL-O0-NEXT:    vmv1r.v v9, v10
 ; SPILL-O0-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; SPILL-O0-NEXT:    # implicit-def: $v8_v9
 ; SPILL-O0-NEXT:    vlseg2e32.v v8, (a0)
 ; SPILL-O0-NEXT:    vmv1r.v v8, v9
 ; SPILL-O0-NEXT:    addi a0, sp, 16
@@ -177,13 +167,8 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i64 %vl) nounwind {
 ; SPILL-O0-NEXT:    csrr a2, vlenb
 ; SPILL-O0-NEXT:    slli a2, a2, 1
 ; SPILL-O0-NEXT:    sub sp, sp, a2
-; SPILL-O0-NEXT:    # implicit-def: $v8m2
-; SPILL-O0-NEXT:    # implicit-def: $v10m2
-; SPILL-O0-NEXT:    # implicit-def: $v12m2
-; SPILL-O0-NEXT:    # implicit-def: $v10m2
-; SPILL-O0-NEXT:    # kill: def $v8m2 killed $v8m2 def $v8m2_v10m2
-; SPILL-O0-NEXT:    vmv2r.v v10, v12
 ; SPILL-O0-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; SPILL-O0-NEXT:    # implicit-def: $v8m2_v10m2
 ; SPILL-O0-NEXT:    vlseg2e32.v v8, (a0)
 ; SPILL-O0-NEXT:    vmv2r.v v8, v10
 ; SPILL-O0-NEXT:    addi a0, sp, 16
@@ -262,13 +247,8 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i64 %vl) nounwind {
 ; SPILL-O0-NEXT:    csrr a2, vlenb
 ; SPILL-O0-NEXT:    slli a2, a2, 2
 ; SPILL-O0-NEXT:    sub sp, sp, a2
-; SPILL-O0-NEXT:    # implicit-def: $v8m4
-; SPILL-O0-NEXT:    # implicit-def: $v12m4
-; SPILL-O0-NEXT:    # implicit-def: $v16m4
-; SPILL-O0-NEXT:    # implicit-def: $v12m4
-; SPILL-O0-NEXT:    # kill: def $v8m4 killed $v8m4 def $v8m4_v12m4
-; SPILL-O0-NEXT:    vmv4r.v v12, v16
 ; SPILL-O0-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; SPILL-O0-NEXT:    # implicit-def: $v8m4_v12m4
 ; SPILL-O0-NEXT:    vlseg2e32.v v8, (a0)
 ; SPILL-O0-NEXT:    vmv4r.v v8, v12
 ; SPILL-O0-NEXT:    addi a0, sp, 16
@@ -347,16 +327,8 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i64 %vl) nounwind {
 ; SPILL-O0-NEXT:    csrr a2, vlenb
 ; SPILL-O0-NEXT:    slli a2, a2, 1
 ; SPILL-O0-NEXT:    sub sp, sp, a2
-; SPILL-O0-NEXT:    # implicit-def: $v8m2
-; SPILL-O0-NEXT:    # implicit-def: $v10m2
-; SPILL-O0-NEXT:    # implicit-def: $v16m2
-; SPILL-O0-NEXT:    # implicit-def: $v10m2
-; SPILL-O0-NEXT:    # implicit-def: $v14m2
-; SPILL-O0-NEXT:    # implicit-def: $v10m2
-; SPILL-O0-NEXT:    # kill: def $v8m2 killed $v8m2 def $v8m2_v10m2_v12m2
-; SPILL-O0-NEXT:    vmv2r.v v10, v16
-; SPILL-O0-NEXT:    vmv2r.v v12, v14
 ; SPILL-O0-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; SPILL-O0-NEXT:    # implicit-def: $v8m2_v10m2_v12m2
 ; SPILL-O0-NEXT:    vlseg3e32.v v8, (a0)
 ; SPILL-O0-NEXT:    vmv2r.v v8, v10
 ; SPILL-O0-NEXT:    addi a0, sp, 16

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll b/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll
index 15cb42bacf173..390647fd9e6c6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll
@@ -66,12 +66,7 @@ define i64 @test_vlseg2ff_nxv8i8(ptr %base, i64 %vl, ptr %outvl) {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gprnox0 = COPY $x11
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr = COPY $x10
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:vr = IMPLICIT_DEF
-  ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:vr = IMPLICIT_DEF
-  ; CHECK-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vrn2m1 = REG_SEQUENCE [[DEF]], %subreg.sub_vrm1_0, [[DEF2]], %subreg.sub_vrm1_1
-  ; CHECK-NEXT:   [[PseudoVLSEG2E8FF_V_M1_:%[0-9]+]]:vrn2m1, [[PseudoVLSEG2E8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLSEG2E8FF_V_M1 [[REG_SEQUENCE]], [[COPY1]], [[COPY]], 3 /* e8 */, 2 /* tu, ma */, implicit-def dead $vl :: (load unknown-size from %ir.base, align 1)
+  ; CHECK-NEXT:   [[PseudoVLSEG2E8FF_V_M1_:%[0-9]+]]:vrn2m1, [[PseudoVLSEG2E8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLSEG2E8FF_V_M1 $noreg, [[COPY1]], [[COPY]], 3 /* e8 */, 2 /* tu, ma */, implicit-def dead $vl :: (load unknown-size from %ir.base, align 1)
   ; CHECK-NEXT:   $x10 = COPY [[PseudoVLSEG2E8FF_V_M1_1]]
   ; CHECK-NEXT:   PseudoRET implicit $x10
 entry: