# [llvm-branch-commits] [llvm] 22dcfb4 - [RISCVInsertVSETVLI] Remove an unsound optimization

Tobias Hieta via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Aug 11 00:09:47 PDT 2022

```Author: Philip Reames
Date: 2022-08-11T09:08:33+02:00
New Revision: 22dcfb4f16bd39962519c7c3415b2806e9f25ba1

URL: https://github.com/llvm/llvm-project/commit/22dcfb4f16bd39962519c7c3415b2806e9f25ba1
DIFF: https://github.com/llvm/llvm-project/commit/22dcfb4f16bd39962519c7c3415b2806e9f25ba1.diff

LOG: [RISCVInsertVSETVLI] Remove an unsound optimization

This fixes a bug reported privately by @craig.topper. Here's an example which illustrates the problem:

vsetivli a1, a0, e32, m1, ta, mu # both DefInfo and PrevInfo
vsetivli a2, a1, e32, m4, ta, mu

With the unsound result being:

vsetivli a1, a0, e32, m1, ta, mu
vsetivli a2, a0, e32, m4, ta, mu

Consider the case where this is running on a machine with VLEN=512,. For this case, the VLMAXs are 16 and 64 respectively.

Consider for a0 = 33. The correct result is: a1 = 16, and a2 = 16

After the unsound optimization: a1 = 16 and a2 = 33

This particular example used VLMAXs which differed by more than a power of two. With a difference of only one power of two, there's another form of this bug which involves the AVL < 2 x VLMAX special case, but that ones more complicated to construct as many examples turn out accidentally sound.

This patch takes the approach of simply removing the unsound optimization, but there are multiple sound sub-cases of it. I plan to return to at least a couple of them, but figured it was cleaner to remove the unsound optimization (for ease of backporting), and then review the new optimizations on their own.

Differential Revision: https://reviews.llvm.org/D131264

Modified:
llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll

Removed:

################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index fc0a983f6542..5d9bd2f67558 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1022,16 +1022,10 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &M
return;
}

-  // Two cases involving an AVL resulting from a previous vsetvli.
-  // 1) If the AVL is the result of a previous vsetvli which has the
-  //    same AVL and VLMAX as our current state, we can reuse the AVL
-  //    from the current state for the new one.  This allows us to
-  //    generate 'vsetvli x0, x0, vtype" or possible skip the transition
-  //    entirely.
-  // 2) If AVL is defined by a vsetvli with the same VLMAX, we can
-  //    replace the AVL operand with the AVL of the defining vsetvli.
-  //    We avoid general register AVLs to avoid extending live ranges
-  //    without being sure we can kill the original source reg entirely.
+  // If AVL is defined by a vsetvli with the same VLMAX, we can
+  // replace the AVL operand with the AVL of the defining vsetvli.
+  // We avoid general register AVLs to avoid extending live ranges
+  // without being sure we can kill the original source reg entirely.
if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual())
return;
MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg());
@@ -1039,17 +1033,6 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &M
return;

VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
-  // case 1
-  if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
-      DefInfo.hasSameAVL(PrevInfo) &&
-      DefInfo.hasSameVLMAX(PrevInfo)) {
-    if (PrevInfo.hasAVLImm())
-      Info.setAVLImm(PrevInfo.getAVLImm());
-    else
-      Info.setAVLReg(PrevInfo.getAVLReg());
-    return;
-  }
-  // case 2
if (DefInfo.hasSameVLMAX(Info) &&
(DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) {
if (DefInfo.hasAVLImm())

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
index 2184925214c2..65d1bf6a026f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
@@ -837,7 +837,7 @@ define <vscale x 2 x i32> @pre_lmul(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    andi a1, a0, 1
; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vsetvli a2, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT:    vadd.vv v8, v8, v9
; CHECK-NEXT:    beqz a1, .LBB18_2
; CHECK-NEXT:  # %bb.1: # %if

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
index acb101388873..1ddd981ff417 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
@@ -18,6 +18,7 @@ declare <vscale x 1 x i64> @llvm.riscv.vle.mask.nxv1i64(
define <vscale x 1 x double> @test1(i64 %avl, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test1:
; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32, mf2, ta, mu
; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT:    vfadd.vv v8, v8, v9
; CHECK-NEXT:    ret
@@ -412,7 +413,7 @@ define i64 @avl_forward1b_neg(<vscale x 2 x i32> %v, <vscale x 2 x i32>* %p) nou
; CHECK-LABEL: avl_forward1b_neg:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    vsetivli a1, 6, e16, m1, ta, mu
-; CHECK-NEXT:    vsetivli zero, 6, e32, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    mv a0, a1
; CHECK-NEXT:    ret
@@ -467,6 +468,7 @@ entry:
define void @avl_forward4(<vscale x 2 x i32> %v, <vscale x 2 x i32>* %p, i64 %reg) nounwind {
; CHECK-LABEL: avl_forward4:
; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e16, m1, ta, mu
; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
@@ -480,10 +482,10 @@ entry:
define i64 @avl_forward4b(<vscale x 2 x i32> %v, <vscale x 2 x i32>* %p, i64 %reg) nounwind {
; CHECK-LABEL: avl_forward4b:
; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a1, e16, m1, ta, mu
+; CHECK-NEXT:    vsetvli a1, a1, e16, m1, ta, mu
; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    mv a0, a2
+; CHECK-NEXT:    mv a0, a1
; CHECK-NEXT:    ret
entry:
%vl = tail call i64 @llvm.riscv.vsetvli(i64 %reg, i64 1, i64 0)
@@ -496,6 +498,7 @@ entry:
define <vscale x 1 x i64> @vleNff(i64* %str, i64 %n, i64 %x) {
; CHECK-LABEL: vleNff:
; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8, m4, ta, mu
; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
; CHECK-NEXT:    vle64ff.v v8, (a0)
; CHECK-NEXT:    vsetvli zero, zero, e64, m1, tu, mu
@@ -516,6 +519,7 @@ entry:
define <vscale x 1 x i64> @vleNff2(i64* %str, i64 %n, i64 %x) {
; CHECK-LABEL: vleNff2:
; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8, m4, ta, mu
; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
; CHECK-NEXT:    vle64ff.v v8, (a0)
; CHECK-NEXT:    vadd.vx v8, v8, a2
@@ -541,6 +545,7 @@ define <vscale x 2 x i32> @avl_forward5(<vscale x 2 x i32>* %addr) {
; CHECK-LABEL: avl_forward5:
; CHECK:       # %bb.0:
; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli a1, a1, e8, m4, ta, mu
; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    ret

```