[llvm] [DAG] Fix a miscompile in insert_subvector undef (insert_subvector undef, ..), idx combine (PR #73587)

Mon Nov 27 16:41:44 PST 2023

https://github.com/preames updated https://github.com/llvm/llvm-project/pull/73587

>From 5a34e47c987778b3dce2b4cf67a1626d0f0d1323 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Mon, 27 Nov 2023 15:01:40 -0800
Subject: [PATCH 1/3] Add test case which demonstrates issue

---
 .../CodeGen/RISCV/rvv/insert-subvector.ll     | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
index 57de8341cb89cef..0ba7322215e3753 100644
--- a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
@@ -505,6 +505,35 @@ define <vscale x 2 x i64> @insert_nxv2i64_nxv3i64(<3 x i64> %sv) #0 {
   ret <vscale x 2 x i64> %vec
 }
 
+; FIXME: This shows a case where we are miscompiling because the index of the
+; outer expects a scalable inner and the inner most subvector is fixed length.
+; The code generated happens to be correct if VLEN=128, but is wrong if
+; VLEN=256.
+define <vscale x 8 x i32> @insert_insert_combine(<2 x i32> %subvec) {
+; CHECK-LABEL: insert_insert_combine:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e32, m4, ta, ma
+; CHECK-NEXT:    vslideup.vi v12, v8, 4
+; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    ret
+  %inner = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v2i32(<vscale x 4 x i32> undef, <2 x i32> %subvec, i64 0)
+  %outer = call <vscale x 8 x i32> @llvm.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32> undef, <vscale x 4 x i32> %inner, i64 4)
+  ret <vscale x 8 x i32> %outer
+}
+
+; We can combine these two (even with non-zero index on the outer) because
+; the vector must be an even multiple.
+define <vscale x 8 x i32> @insert_insert_combine2(<vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_insert_combine2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    ret
+  %inner = call <vscale x 4 x i32> @llvm.vector.insert.nxv2i32.nxv4i32(<vscale x 4 x i32> undef, <vscale x 2 x i32> %subvec, i64 0)
+  %outer = call <vscale x 8 x i32> @llvm.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32> undef, <vscale x 4 x i32> %inner, i64 4)
+  ret <vscale x 8 x i32> %outer
+}
+
+
 attributes #0 = { vscale_range(2,1024) }
 
 declare <vscale x 4 x i1> @llvm.vector.insert.nxv1i1.nxv4i1(<vscale x 4 x i1>, <vscale x 1 x i1>, i64)
@@ -517,6 +546,9 @@ declare <vscale x 32 x half> @llvm.vector.insert.nxv2f16.nxv32f16(<vscale x 32 x
 
 declare <vscale x 4 x i8> @llvm.vector.insert.nxv1i8.nxv4i8(<vscale x 4 x i8>, <vscale x 1 x i8>, i64 %idx)
 
+declare <vscale x 4 x i32> @llvm.vector.insert.nxv2i32.nxv4i32(<vscale x 4 x i32>, <vscale x 2 x i32>, i64)
+declare <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v2i32(<vscale x 4 x i32>, <2 x i32>, i64)
+
 declare <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32>, <vscale x 2 x i32>, i64 %idx)
 declare <vscale x 8 x i32> @llvm.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32>, <vscale x 4 x i32>, i64 %idx)
 

>From 256140f3bd126d48ad7cc43fa038b162158e55a5 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Mon, 27 Nov 2023 15:11:07 -0800
Subject: [PATCH 2/3] [DAG] Fix a miscompile in insert_subvector undef
 (insert_subvector ..), idx combine

The combine was implicitly assuming that the index on the outer insert_subvector meant the same thing when the source was switched to be the index of the inner insert_subvector.  This is not true if the innermost sub-vector is fixed, and the outer subvector is scalable.

I could do a less restrictive fix here - i.e. allow the case where the scalability of the subvectors are the same - but there's no test coverage which shows this transform actually has profit.  Given that, go for the simplest fix.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp   | 7 ++++---
 llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll | 4 +---
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 41d36e7d16d2e14..2a3425a42607e72 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -26153,10 +26153,11 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
                        N1, N2);
 
   // Eliminate an intermediate insert into an undef vector:
-  // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
-  // insert_subvector undef, X, N2
+  // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
+  // insert_subvector undef, X, 0
   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
-      N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
+      N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
+      isNullConstant(N2))
     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
                        N1.getOperand(1), N2);
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
index 0ba7322215e3753..63c8861c3fcf5dd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
@@ -512,9 +512,7 @@ define <vscale x 2 x i64> @insert_nxv2i64_nxv3i64(<3 x i64> %sv) #0 {
 define <vscale x 8 x i32> @insert_insert_combine(<2 x i32> %subvec) {
 ; CHECK-LABEL: insert_insert_combine:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 6, e32, m4, ta, ma
-; CHECK-NEXT:    vslideup.vi v12, v8, 4
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vmv1r.v v10, v8
 ; CHECK-NEXT:    ret
   %inner = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v2i32(<vscale x 4 x i32> undef, <2 x i32> %subvec, i64 0)
   %outer = call <vscale x 8 x i32> @llvm.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32> undef, <vscale x 4 x i32> %inner, i64 4)

>From 3eb7f0ac923f350473573bca953df78e24354a50 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Mon, 27 Nov 2023 16:41:29 -0800
Subject: [PATCH 3/3] Drop fixme

---
 llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
index 63c8861c3fcf5dd..8a368e7161c3f2a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
@@ -505,7 +505,7 @@ define <vscale x 2 x i64> @insert_nxv2i64_nxv3i64(<3 x i64> %sv) #0 {
   ret <vscale x 2 x i64> %vec
 }
 
-; FIXME: This shows a case where we are miscompiling because the index of the
+; This shows a case where we were miscompiling because the index of the
 ; outer expects a scalable inner and the inner most subvector is fixed length.
 ; The code generated happens to be correct if VLEN=128, but is wrong if
 ; VLEN=256.