[llvm] [RISCV] Lower (vector_interleave X, undef) to (vzext_vl X). (PR #87283)

Tue Apr 2 08:40:12 PDT 2024

https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/87283

>From 982160fa71b498a97f01a65519b7d6067ecf9ab9 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Mon, 1 Apr 2024 14:07:42 -0700
Subject: [PATCH 1/3] [RISCV] Add test for miscompile of vector.interleave when
 odd vector is literal poison.

The interleave lowering relies on a math trick that requires passing
the odd vector to two math instructions. In order to be correct
these instructions must see the same value.

If the odd vector is provably poison or undef, SelectionDAG will
create a vwadd and vwmaccu where the operand is a copy from IMPLICIT_DEF.
Later this will become just the undef flag on the operand. This
gives the register allocator freedom to pick a different register
for each instruction.
---
 .../CodeGen/RISCV/rvv/vector-interleave.ll    | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
index 1acc0fec8fe586..47cc4f03062e83 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
@@ -656,6 +656,31 @@ define <vscale x 16 x double> @vector_interleave_nxv16f64_nxv8f64(<vscale x 8 x
   ret <vscale x 16 x double> %res
 }
 
+; FIXME: The last operand to the vwaddu.vv and vwmaccu.vx are both undef. They
+; need to be the same register with the same contents. Otherwise, the even
+; elements will not contain just the values from %a.
+define <vscale x 8 x i32> @vector_interleave_nxv8i32_nxv4i32_poison(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: vector_interleave_nxv8i32_nxv4i32_poison:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vwaddu.vv v12, v8, v10
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vwmaccu.vx v12, a0, v8
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+;
+; ZVBB-LABEL: vector_interleave_nxv8i32_nxv4i32_poison:
+; ZVBB:       # %bb.0:
+; ZVBB-NEXT:    li a0, 32
+; ZVBB-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; ZVBB-NEXT:    vwsll.vx v12, v10, a0
+; ZVBB-NEXT:    vwaddu.wv v12, v12, v8
+; ZVBB-NEXT:    vmv4r.v v8, v12
+; ZVBB-NEXT:    ret
+  %res = call <vscale x 8 x i32> @llvm.experimental.vector.interleave2.nxv8i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> poison)
+  ret <vscale x 8 x i32> %res
+}
+
 declare <vscale x 64 x half> @llvm.experimental.vector.interleave2.nxv64f16(<vscale x 32 x half>, <vscale x 32 x half>)
 declare <vscale x 32 x float> @llvm.experimental.vector.interleave2.nxv32f32(<vscale x 16 x float>, <vscale x 16 x float>)
 declare <vscale x 16 x double> @llvm.experimental.vector.interleave2.nxv16f64(<vscale x 8 x double>, <vscale x 8 x double>)

>From 97bf6a0ec0abe05d6411f3288b990f9e07f31130 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Mon, 1 Apr 2024 14:14:49 -0700
Subject: [PATCH 2/3] [RISCV] Lower (vector_interleave X, undef) to (vzext_vl
 X).

If the odd vector is undef or poison, the widening add and multiply trick
doesn't work unless we freeze the odd vector.

Unfortunately, freezing doesn't work when the operand is provably
undef/poison. MIR doesn't have a representation for freeze so it
just becomes a COPY from IMPLICIT_DEF which freely propagates undef
to each operand independently.

To work around this, check for undef explicitly and lower to a VZEXT_VL
of the even vector. This produces better code than we'd get from a
freeze anyway.

I've left a FIXME for adding a freeze. I'll do that as a separate patch
as it affects other tests and doesn't help with the new test.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp      | 11 ++++++++++-
 llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll | 16 ++++++----------
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f693cbd3bea51e..c37938cd9559f0 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4624,7 +4624,13 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
   SDValue Passthru = DAG.getUNDEF(WideContainerVT);
 
   SDValue Interleaved;
-  if (Subtarget.hasStdExtZvbb()) {
+  if (OddV.isUndef()) {
+    // If OddV is undef, this is a zero extend.
+    // FIXME: Not only does this optimize the code, it fixes some correctness
+    // issues because MIR does not have freeze.
+    Interleaved = DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV,
+                              Mask, VL);
+  } else if (Subtarget.hasStdExtZvbb()) {
     // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
     SDValue OffsetVec =
         DAG.getSplatVector(VecContainerVT, DL,
@@ -4635,6 +4641,9 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
     Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
                               Interleaved, EvenV, Passthru, Mask, VL);
   } else {
+    // FIXME: We should freeze the odd vector here. We already handled the case
+    // of provably undef/poison above.
+
     // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
     // vwaddu.vv
     Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
index 47cc4f03062e83..7068e044dfc4f4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
@@ -662,20 +662,16 @@ define <vscale x 16 x double> @vector_interleave_nxv16f64_nxv8f64(<vscale x 8 x
 define <vscale x 8 x i32> @vector_interleave_nxv8i32_nxv4i32_poison(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: vector_interleave_nxv8i32_nxv4i32_poison:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vwaddu.vv v12, v8, v10
-; CHECK-NEXT:    li a0, -1
-; CHECK-NEXT:    vwmaccu.vx v12, a0, v8
-; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vzext.vf2 v12, v8
+; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
 ;
 ; ZVBB-LABEL: vector_interleave_nxv8i32_nxv4i32_poison:
 ; ZVBB:       # %bb.0:
-; ZVBB-NEXT:    li a0, 32
-; ZVBB-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; ZVBB-NEXT:    vwsll.vx v12, v10, a0
-; ZVBB-NEXT:    vwaddu.wv v12, v12, v8
-; ZVBB-NEXT:    vmv4r.v v8, v12
+; ZVBB-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; ZVBB-NEXT:    vzext.vf2 v12, v8
+; ZVBB-NEXT:    vmv.v.v v8, v12
 ; ZVBB-NEXT:    ret
   %res = call <vscale x 8 x i32> @llvm.experimental.vector.interleave2.nxv8i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> poison)
   ret <vscale x 8 x i32> %res

>From 68b68029855165663593b78932decf4beaefb16e Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Mon, 1 Apr 2024 14:39:27 -0700
Subject: [PATCH 3/3] fixup! clang-format and remove FIXME from test.

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp      | 4 ++--
 llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll | 3 ---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c37938cd9559f0..ee83f9da4934b8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4628,8 +4628,8 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
     // If OddV is undef, this is a zero extend.
     // FIXME: Not only does this optimize the code, it fixes some correctness
     // issues because MIR does not have freeze.
-    Interleaved = DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV,
-                              Mask, VL);
+    Interleaved =
+        DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL);
   } else if (Subtarget.hasStdExtZvbb()) {
     // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
     SDValue OffsetVec =
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
index 7068e044dfc4f4..0992c9fe495f43 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
@@ -656,9 +656,6 @@ define <vscale x 16 x double> @vector_interleave_nxv16f64_nxv8f64(<vscale x 8 x
   ret <vscale x 16 x double> %res
 }
 
-; FIXME: The last operand to the vwaddu.vv and vwmaccu.vx are both undef. They
-; need to be the same register with the same contents. Otherwise, the even
-; elements will not contain just the values from %a.
 define <vscale x 8 x i32> @vector_interleave_nxv8i32_nxv4i32_poison(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: vector_interleave_nxv8i32_nxv4i32_poison:
 ; CHECK:       # %bb.0: