[llvm] 3d17c91 - [RISCV] Fix missing vsetvli in transparent block case

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Mon May 16 17:06:43 PDT 2022


Author: Philip Reames
Date: 2022-05-16T17:06:27-07:00
New Revision: 3d17c917099a691e12f05d2502d81b972600a4ae

URL: https://github.com/llvm/llvm-project/commit/3d17c917099a691e12f05d2502d81b972600a4ae
DIFF: https://github.com/llvm/llvm-project/commit/3d17c917099a691e12f05d2502d81b972600a4ae.diff

LOG: [RISCV] Fix missing vsetvli in transparent block case

We've got a lurking problem with our data flow implementation where different phases disagree, resulting in possible miscompiles. D119518 introduced a workaround, but failed to consider blocks which only contain load/stores compatible with their incoming state.

When I went to rebase and simplify D125232, it turned out that not all of the correctness issues had been fixed yet after all. This is the correctness fix accidentally embedded in the original more complicated version.

Note that the test changes here are mostly regressions. It's worth noting that the simplified version of D125232 exactly reverses all the non-functional diffs in the test caused here. D125232 should be the immediate following commit.

Differential Revision: https://reviews.llvm.org/D125703

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
    llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
    llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 5f3e3353600e7..d7d7cc79edb49 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1122,9 +1122,9 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
       if (!CurInfo.isValid()) {
         // We haven't found any vector instructions or VL/VTYPE changes yet,
         // use the predecessor information.
-        assert(BlockInfo[MBB.getNumber()].Pred.isValid() &&
-               "Expected a valid predecessor state.");
-        if (needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred)) {
+        CurInfo = BlockInfo[MBB.getNumber()].Pred;
+        assert(CurInfo.isValid() && "Expected a valid predecessor state.");
+        if (needVSETVLI(NewInfo, CurInfo)) {
           // If this is the first implicit state change, and the state change
           // requested can be proven to produce the same register contents, we
           // can skip emitting the actual state change and continue as if we
@@ -1133,7 +1133,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
           // we *do* need to model the state as if it changed as while the
           // register contents are unchanged, the abstract model can change.
           if (needVSETVLIPHI(NewInfo, MBB))
-            insertVSETVLI(MBB, MI, NewInfo, BlockInfo[MBB.getNumber()].Pred);
+            insertVSETVLI(MBB, MI, NewInfo, CurInfo);
           CurInfo = NewInfo;
         }
       } else {

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
index bdaa993435589..5d8a855701741 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
@@ -23,13 +23,15 @@ declare void @llvm.riscv.vse.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>*
 define <vscale x 1 x double> @test1(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
 ; CHECK-LABEL: test1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vsetvli a0, a0, e64, m1, ta, mu
 ; CHECK-NEXT:    beqz a1, .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %if.then
 ; CHECK-NEXT:    vfadd.vv v8, v8, v9
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB0_2: # %if.else
 ; CHECK-NEXT:    vfsub.vv v8, v8, v9
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
@@ -54,14 +56,15 @@ if.end:                                           ; preds = %if.else, %if.then
 define <vscale x 1 x double> @test2(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
 ; CHECK-LABEL: test2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vsetvli a0, a0, e64, m1, ta, mu
 ; CHECK-NEXT:    beqz a1, .LBB1_2
 ; CHECK-NEXT:  # %bb.1: # %if.then
 ; CHECK-NEXT:    vfadd.vv v9, v8, v9
-; CHECK-NEXT:    vfmul.vv v8, v9, v8
-; CHECK-NEXT:    ret
+; CHECK-NEXT:    j .LBB1_3
 ; CHECK-NEXT:  .LBB1_2: # %if.else
 ; CHECK-NEXT:    vfsub.vv v9, v8, v9
+; CHECK-NEXT:  .LBB1_3: # %if.end
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
 ; CHECK-NEXT:    vfmul.vv v8, v9, v8
 ; CHECK-NEXT:    ret
 entry:
@@ -180,22 +183,23 @@ define <vscale x 1 x double> @test5(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
 ; CHECK-LABEL: test5:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    andi a2, a1, 1
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    bnez a2, .LBB4_3
+; CHECK-NEXT:    vsetvli a0, a0, e64, m1, ta, mu
+; CHECK-NEXT:    bnez a2, .LBB4_2
 ; CHECK-NEXT:  # %bb.1: # %if.else
 ; CHECK-NEXT:    vfsub.vv v9, v8, v9
-; CHECK-NEXT:    andi a0, a1, 2
-; CHECK-NEXT:    beqz a0, .LBB4_4
-; CHECK-NEXT:  .LBB4_2: # %if.then4
-; CHECK-NEXT:    vfmul.vv v8, v9, v8
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB4_3: # %if.then
+; CHECK-NEXT:    j .LBB4_3
+; CHECK-NEXT:  .LBB4_2: # %if.then
 ; CHECK-NEXT:    vfadd.vv v9, v8, v9
+; CHECK-NEXT:  .LBB4_3: # %if.end
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
 ; CHECK-NEXT:    andi a0, a1, 2
-; CHECK-NEXT:    bnez a0, .LBB4_2
-; CHECK-NEXT:  .LBB4_4: # %if.else5
+; CHECK-NEXT:    bnez a0, .LBB4_5
+; CHECK-NEXT:  # %bb.4: # %if.else5
 ; CHECK-NEXT:    vfmul.vv v8, v8, v9
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB4_5: # %if.then4
+; CHECK-NEXT:    vfmul.vv v8, v9, v8
+; CHECK-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
   %conv = zext i8 %cond to i32
@@ -238,29 +242,17 @@ define <vscale x 1 x double> @test6(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    andi a3, a1, 1
 ; CHECK-NEXT:    vsetvli a2, a0, e64, m1, ta, mu
-; CHECK-NEXT:    bnez a3, .LBB5_3
+; CHECK-NEXT:    bnez a3, .LBB5_2
 ; CHECK-NEXT:  # %bb.1: # %if.else
 ; CHECK-NEXT:    vfsub.vv v8, v8, v9
-; CHECK-NEXT:    andi a1, a1, 2
-; CHECK-NEXT:    beqz a1, .LBB5_4
-; CHECK-NEXT:  .LBB5_2: # %if.then4
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT:    addi a0, a0, %lo(.LCPI5_0)
-; CHECK-NEXT:    vlse64.v v9, (a0), zero
-; CHECK-NEXT:    lui a0, %hi(.LCPI5_1)
-; CHECK-NEXT:    addi a0, a0, %lo(.LCPI5_1)
-; CHECK-NEXT:    vlse64.v v10, (a0), zero
-; CHECK-NEXT:    vfadd.vv v9, v9, v10
-; CHECK-NEXT:    lui a0, %hi(scratch)
-; CHECK-NEXT:    addi a0, a0, %lo(scratch)
-; CHECK-NEXT:    vse64.v v9, (a0)
-; CHECK-NEXT:    j .LBB5_5
-; CHECK-NEXT:  .LBB5_3: # %if.then
+; CHECK-NEXT:    j .LBB5_3
+; CHECK-NEXT:  .LBB5_2: # %if.then
 ; CHECK-NEXT:    vfadd.vv v8, v8, v9
+; CHECK-NEXT:  .LBB5_3: # %if.end
+; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
 ; CHECK-NEXT:    andi a1, a1, 2
-; CHECK-NEXT:    bnez a1, .LBB5_2
-; CHECK-NEXT:  .LBB5_4: # %if.else5
+; CHECK-NEXT:    bnez a1, .LBB5_5
+; CHECK-NEXT:  # %bb.4: # %if.else5
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
 ; CHECK-NEXT:    lui a0, %hi(.LCPI5_2)
 ; CHECK-NEXT:    addi a0, a0, %lo(.LCPI5_2)
@@ -272,7 +264,20 @@ define <vscale x 1 x double> @test6(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
 ; CHECK-NEXT:    lui a0, %hi(scratch)
 ; CHECK-NEXT:    addi a0, a0, %lo(scratch)
 ; CHECK-NEXT:    vse32.v v9, (a0)
-; CHECK-NEXT:  .LBB5_5: # %if.end10
+; CHECK-NEXT:    j .LBB5_6
+; CHECK-NEXT:  .LBB5_5: # %if.then4
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI5_0)
+; CHECK-NEXT:    vlse64.v v9, (a0), zero
+; CHECK-NEXT:    lui a0, %hi(.LCPI5_1)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI5_1)
+; CHECK-NEXT:    vlse64.v v10, (a0), zero
+; CHECK-NEXT:    vfadd.vv v9, v9, v10
+; CHECK-NEXT:    lui a0, %hi(scratch)
+; CHECK-NEXT:    addi a0, a0, %lo(scratch)
+; CHECK-NEXT:    vse64.v v9, (a0)
+; CHECK-NEXT:  .LBB5_6: # %if.end10
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
 ; CHECK-NEXT:    vfmul.vv v8, v8, v8
 ; CHECK-NEXT:    ret
@@ -337,6 +342,7 @@ define <vscale x 1 x double> @test8(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
 ; CHECK-NEXT:    beqz a1, .LBB6_2
 ; CHECK-NEXT:  # %bb.1: # %if.then
 ; CHECK-NEXT:    vfadd.vv v8, v8, v9
+; CHECK-NEXT:    vsetvli zero, s0, e64, m1, ta, mu
 ; CHECK-NEXT:    j .LBB6_3
 ; CHECK-NEXT:  .LBB6_2: # %if.else
 ; CHECK-NEXT:    csrr a0, vlenb
@@ -412,6 +418,7 @@ define <vscale x 1 x double> @test9(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
 ; CHECK-NEXT:    j .LBB7_3
 ; CHECK-NEXT:  .LBB7_2: # %if.else
 ; CHECK-NEXT:    vfsub.vv v9, v8, v9
+; CHECK-NEXT:    vsetvli zero, s0, e64, m1, ta, mu
 ; CHECK-NEXT:  .LBB7_3: # %if.end
 ; CHECK-NEXT:    vsetvli zero, s0, e64, m1, ta, mu
 ; CHECK-NEXT:    vfmul.vv v8, v9, v8

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir
index 48ae74c91bc29..9a44fe519d30b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir
@@ -377,6 +377,7 @@ body:             |
   ; CHECK-NEXT:   [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2
   ; CHECK-NEXT:   [[PseudoVMV_X_S_M1_:%[0-9]+]]:gpr = PseudoVMV_X_S_M1 [[PHI]], 6 /* e64 */, implicit $vtype
   ; CHECK-NEXT:   $x10 = COPY [[PseudoVMV_X_S_M1_]]
+  ; CHECK-NEXT:   dead $x0 = PseudoVSETVLIX0 killed $x0, 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl
   ; CHECK-NEXT:   PseudoRET implicit $x10
   bb.0.entry:
     successors: %bb.2(0x30000000), %bb.1(0x50000000)
@@ -436,7 +437,7 @@ body:             |
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vr = COPY $v9
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vr = COPY $v8
   ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:gpr = COPY $x10
-  ; CHECK-NEXT:   $x0 = PseudoVSETVLI [[COPY]], 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype
+  ; CHECK-NEXT:   [[PseudoVSETVLI:%[0-9]+]]:gprnox0 = PseudoVSETVLI [[COPY]], 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:gpr = COPY $x0
   ; CHECK-NEXT:   BEQ [[COPY3]], [[COPY4]], %bb.2
   ; CHECK-NEXT:   PseudoBR %bb.1
@@ -445,12 +446,14 @@ body:             |
   ; CHECK-NEXT:   successors: %bb.3(0x80000000)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype
+  ; CHECK-NEXT:   dead $x0 = PseudoVSETVLI [[PseudoVSETVLI]], 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype
   ; CHECK-NEXT:   PseudoBR %bb.3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.if.else:
   ; CHECK-NEXT:   successors: %bb.3(0x80000000)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype
+  ; CHECK-NEXT:   dead $x0 = PseudoVSETVLI [[PseudoVSETVLI]], 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3.if.end:
   ; CHECK-NEXT:   [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2
@@ -604,10 +607,8 @@ body:             |
   ; CHECK-NEXT:   [[PseudoVADD_VX_M1_:%[0-9]+]]:vr = PseudoVADD_VX_M1 [[PseudoVID_V_M1_]], [[PHI]], -1, 6 /* e64 */, implicit $vl, implicit $vtype
   ; CHECK-NEXT:   [[MUL:%[0-9]+]]:gpr = MUL [[PHI]], [[SRLI]]
   ; CHECK-NEXT:   [[ADD:%[0-9]+]]:gpr = ADD [[COPY]], [[MUL]]
-  ; CHECK-NEXT:   dead $x0 = PseudoVSETVLIX0 killed $x0, 87 /* e32, mf2, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl
   ; CHECK-NEXT:   PseudoVSE32_V_MF2 killed [[PseudoVADD_VX_M1_]], killed [[ADD]], -1, 5 /* e32 */, implicit $vl, implicit $vtype
   ; CHECK-NEXT:   [[ADDI:%[0-9]+]]:gpr = ADDI [[PHI]], 1
-  ; CHECK-NEXT:   dead $x0 = PseudoVSETVLIX0 killed $x0, 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl
   ; CHECK-NEXT:   BLTU [[ADDI]], [[COPY1]], %bb.1
   ; CHECK-NEXT:   PseudoBR %bb.2
   ; CHECK-NEXT: {{  $}}
@@ -674,10 +675,8 @@ body:             |
   ; CHECK-NEXT:   [[PseudoVADD_VX_M1_:%[0-9]+]]:vr = PseudoVADD_VX_M1 [[PseudoVID_V_M1_]], [[PHI]], -1, 6 /* e64 */, implicit $vl, implicit $vtype
   ; CHECK-NEXT:   [[MUL:%[0-9]+]]:gpr = MUL [[PHI]], [[SRLI]]
   ; CHECK-NEXT:   [[ADD:%[0-9]+]]:gpr = ADD [[COPY]], [[MUL]]
-  ; CHECK-NEXT:   dead $x0 = PseudoVSETVLIX0 killed $x0, 87 /* e32, mf2, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl
   ; CHECK-NEXT:   PseudoVSE32_V_MF2 killed [[PseudoVADD_VX_M1_]], killed [[ADD]], -1, 5 /* e32 */, implicit $vl, implicit $vtype
   ; CHECK-NEXT:   [[ADDI:%[0-9]+]]:gpr = ADDI [[PHI]], 1
-  ; CHECK-NEXT:   dead $x0 = PseudoVSETVLIX0 killed $x0, 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
   ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
@@ -825,9 +824,6 @@ body:             |
 
 ...
 ---
-# FIXME: This test shows incorrect VSETVLI insertion. The VLUXEI64 needs
-# configuration for SEW=8 but it instead inherits a SEW=64 from the entry
-# block.
 name:            vsetvli_vluxei64_regression
 tracksRegLiveness: true
 body:             |
@@ -853,6 +849,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   %mask:vr = PseudoVMANDN_MM_MF8 %t6, %t3, -1, 0 /* e8 */, implicit $vl, implicit $vtype
   ; CHECK-NEXT:   %t2:gpr = COPY $x0
+  ; CHECK-NEXT:   dead $x0 = PseudoVSETVLIX0 killed $x0, 69 /* e8, mf8, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl
   ; CHECK-NEXT:   BEQ %a, %t2, %bb.3
   ; CHECK-NEXT:   PseudoBR %bb.2
   ; CHECK-NEXT: {{  $}}


        


More information about the llvm-commits mailing list