[llvm] [RISCV] Fix vmerge.vvm/vmv.v.v getting folded into ops with mismatching EEW (PR #101152)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 30 08:39:00 PDT 2024
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/101152
>From 27e82f1ab13c0a1761ea0c1c55f33c4014def119 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 30 Jul 2024 15:33:20 +0800
Subject: [PATCH 1/4] Precommit tests
---
.../RISCV/rvv/rvv-peephole-vmerge-vops.ll | 36 +++++++++++++++++++
.../CodeGen/RISCV/rvv/vmv.v.v-peephole.ll | 12 +++++++
2 files changed, 48 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
index a08bcae074b9b..ddf83d87cea6c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
@@ -1196,3 +1196,39 @@ define <vscale x 2 x i32> @true_mask_vmerge_implicit_passthru(<vscale x 2 x i32>
)
ret <vscale x 2 x i32> %b
}
+
+define <vscale x 2 x i32> @unfoldable_mismatched_sew_mask(<vscale x 2 x i32> %passthru, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, <vscale x 2 x i1> %mask, i64 %avl) {
+; CHECK-LABEL: unfoldable_mismatched_sew_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+ %a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, i64 %avl)
+ %a.bitcast = bitcast <vscale x 1 x i64> %a to <vscale x 2 x i32>
+ %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(
+ <vscale x 2 x i32> %passthru,
+ <vscale x 2 x i32> %passthru,
+ <vscale x 2 x i32> %a.bitcast,
+ <vscale x 2 x i1> %mask,
+ i64 %avl
+ )
+ ret <vscale x 2 x i32> %b
+}
+
+define <vscale x 2 x i32> @unfoldable_mismatched_sew_avl(<vscale x 2 x i32> %passthru, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y) {
+; CHECK-LABEL: unfoldable_mismatched_sew_avl:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 3, e64, m1, tu, ma
+; CHECK-NEXT: vadd.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, i64 5)
+ %a.bitcast = bitcast <vscale x 1 x i64> %a to <vscale x 2 x i32>
+ %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(
+ <vscale x 2 x i32> %passthru,
+ <vscale x 2 x i32> %passthru,
+ <vscale x 2 x i32> %a.bitcast,
+ <vscale x 2 x i1> splat (i1 true),
+ i64 3
+ )
+ ret <vscale x 2 x i32> %b
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
index 8a589a31a1535..65ee91a0e1907 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
@@ -180,3 +180,15 @@ define <vscale x 2 x i32> @unfoldable_vredsum(<vscale x 2 x i32> %passthru, <vsc
%b = call <vscale x 2 x i32> @llvm.riscv.vmv.v.v.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, iXLen 1)
ret <vscale x 2 x i32> %b
}
+
+define <vscale x 2 x i32> @unfoldable_mismatched_sew_diff_vl(<vscale x 2 x i32> %passthru, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y) {
+; CHECK-LABEL: unfoldable_mismatched_sew_diff_vl:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 3, e64, m1, tu, ma
+; CHECK-NEXT: vadd.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, iXLen 6)
+ %a.bitcast = bitcast <vscale x 1 x i64> %a to <vscale x 2 x i32>
+ %b = call <vscale x 2 x i32> @llvm.riscv.vmv.v.v.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a.bitcast, iXLen 3)
+ ret <vscale x 2 x i32> %b
+}
>From fcbbfebef6affea547cc4201c9f4722148a03719 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 30 Jul 2024 16:35:28 +0800
Subject: [PATCH 2/4] [RISCV] Fix vmerge.vvm/vmv.v.v getting folded into ops
with mismatching EEW
As noted in https://github.com/llvm/llvm-project/pull/100367/files#r1695448771, we currently fold in vmerge.vvms and vmv.v.vs into their ops even if the EEW is different. This is incorrect if we end up changing the mask or AVL of the op.
This gets the op's EEW via its simple value type for now since there doesn't seem to be any existing information about the EEW size of instructions. We'll probably need to encode this at some point if we want to be able to access it at the MachineInstr level in #100367
---
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 10 +++++++++-
.../CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll | 12 ++++++++----
llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll | 6 ++++--
3 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 4418905ce21ed..fc0238e4892d6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3855,11 +3855,19 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
// If we end up changing the VL or mask of True, then we need to make sure it
// doesn't raise any observable fp exceptions, since changing the active
// elements will affect how fflags is set.
- if (TrueVL != VL || !IsMasked)
+ if (TrueVL != VL || !IsMasked) {
if (mayRaiseFPException(True.getNode()) &&
!True->getFlags().hasNoFPExcept())
return false;
+ // If the EEW of True is different from vmerge's SEW, then we cannot change
+ // the VL or mask.
+ if (Log2_64(True.getSimpleValueType().getScalarSizeInBits()) !=
+ N->getConstantOperandVal(
+ RISCVII::getSEWOpNum(TII->get(N->getMachineOpcode())) - 1))
+ return false;
+ }
+
SDLoc DL(N);
// From the preconditions we checked above, we know the mask and thus glue
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
index ddf83d87cea6c..c0fb675cb991f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
@@ -1200,8 +1200,10 @@ define <vscale x 2 x i32> @true_mask_vmerge_implicit_passthru(<vscale x 2 x i32>
define <vscale x 2 x i32> @unfoldable_mismatched_sew_mask(<vscale x 2 x i32> %passthru, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, <vscale x 2 x i1> %mask, i64 %avl) {
; CHECK-LABEL: unfoldable_mismatched_sew_mask:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vadd.vv v9, v9, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, i64 %avl)
%a.bitcast = bitcast <vscale x 1 x i64> %a to <vscale x 2 x i32>
@@ -1218,8 +1220,10 @@ define <vscale x 2 x i32> @unfoldable_mismatched_sew_mask(<vscale x 2 x i32> %pa
define <vscale x 2 x i32> @unfoldable_mismatched_sew_avl(<vscale x 2 x i32> %passthru, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y) {
; CHECK-LABEL: unfoldable_mismatched_sew_avl:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 3, e64, m1, tu, ma
-; CHECK-NEXT: vadd.vv v8, v9, v10
+; CHECK-NEXT: vsetivli zero, 5, e64, m1, ta, ma
+; CHECK-NEXT: vadd.vv v9, v9, v10
+; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, i64 5)
%a.bitcast = bitcast <vscale x 1 x i64> %a to <vscale x 2 x i32>
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
index 65ee91a0e1907..4940e652170ab 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
@@ -184,8 +184,10 @@ define <vscale x 2 x i32> @unfoldable_vredsum(<vscale x 2 x i32> %passthru, <vsc
define <vscale x 2 x i32> @unfoldable_mismatched_sew_diff_vl(<vscale x 2 x i32> %passthru, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y) {
; CHECK-LABEL: unfoldable_mismatched_sew_diff_vl:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 3, e64, m1, tu, ma
-; CHECK-NEXT: vadd.vv v8, v9, v10
+; CHECK-NEXT: vsetivli zero, 6, e64, m1, ta, ma
+; CHECK-NEXT: vadd.vv v9, v9, v10
+; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, iXLen 6)
%a.bitcast = bitcast <vscale x 1 x i64> %a to <vscale x 2 x i32>
>From 019073717c5511af11c002d7c1b24f07cd2f952c Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 30 Jul 2024 17:48:48 +0800
Subject: [PATCH 3/4] Use getScalarValueSizeInBits
---
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index fc0238e4892d6..d5f645e8d5158 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3862,7 +3862,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
// If the EEW of True is different from vmerge's SEW, then we cannot change
// the VL or mask.
- if (Log2_64(True.getSimpleValueType().getScalarSizeInBits()) !=
+ if (Log2_64(True.getScalarValueSizeInBits()) !=
N->getConstantOperandVal(
RISCVII::getSEWOpNum(TII->get(N->getMachineOpcode())) - 1))
return false;
>From 70395f346e21697a8d3ba3239ac47c43e6013558 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 30 Jul 2024 23:37:43 +0800
Subject: [PATCH 4/4] Just check MVTs, bail in every case, remove redundant
tests
---
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 14 ++++-----
.../RISCV/rvv/rvv-peephole-vmerge-vops.ll | 29 ++++---------------
.../CodeGen/RISCV/rvv/vmv.v.v-peephole.ll | 12 ++++----
3 files changed, 16 insertions(+), 39 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index d5f645e8d5158..4de38db6e1fe9 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3733,6 +3733,10 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
assert(!Glue || Glue.getValueType() == MVT::Glue);
+ // If the EEW of True is different from vmerge's SEW, then we can't fold.
+ if (True.getSimpleValueType() != N->getSimpleValueType(0))
+ return false;
+
// We require that either passthru and false are the same, or that passthru
// is undefined.
if (Passthru != False && !isImplicitDef(Passthru))
@@ -3855,19 +3859,11 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
// If we end up changing the VL or mask of True, then we need to make sure it
// doesn't raise any observable fp exceptions, since changing the active
// elements will affect how fflags is set.
- if (TrueVL != VL || !IsMasked) {
+ if (TrueVL != VL || !IsMasked)
if (mayRaiseFPException(True.getNode()) &&
!True->getFlags().hasNoFPExcept())
return false;
- // If the EEW of True is different from vmerge's SEW, then we cannot change
- // the VL or mask.
- if (Log2_64(True.getScalarValueSizeInBits()) !=
- N->getConstantOperandVal(
- RISCVII::getSEWOpNum(TII->get(N->getMachineOpcode())) - 1))
- return false;
- }
-
SDLoc DL(N);
// From the preconditions we checked above, we know the mask and thus glue
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
index c0fb675cb991f..259515f160048 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
@@ -1197,42 +1197,23 @@ define <vscale x 2 x i32> @true_mask_vmerge_implicit_passthru(<vscale x 2 x i32>
ret <vscale x 2 x i32> %b
}
-define <vscale x 2 x i32> @unfoldable_mismatched_sew_mask(<vscale x 2 x i32> %passthru, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, <vscale x 2 x i1> %mask, i64 %avl) {
-; CHECK-LABEL: unfoldable_mismatched_sew_mask:
+
+define <vscale x 2 x i32> @unfoldable_mismatched_sew(<vscale x 2 x i32> %passthru, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, <vscale x 2 x i1> %mask, i64 %avl) {
+; CHECK-LABEL: unfoldable_mismatched_sew:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vadd.vv v9, v9, v10
; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
-; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
-; CHECK-NEXT: ret
- %a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, i64 %avl)
- %a.bitcast = bitcast <vscale x 1 x i64> %a to <vscale x 2 x i32>
- %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(
- <vscale x 2 x i32> %passthru,
- <vscale x 2 x i32> %passthru,
- <vscale x 2 x i32> %a.bitcast,
- <vscale x 2 x i1> %mask,
- i64 %avl
- )
- ret <vscale x 2 x i32> %b
-}
-
-define <vscale x 2 x i32> @unfoldable_mismatched_sew_avl(<vscale x 2 x i32> %passthru, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y) {
-; CHECK-LABEL: unfoldable_mismatched_sew_avl:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 5, e64, m1, ta, ma
-; CHECK-NEXT: vadd.vv v9, v9, v10
-; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
- %a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, i64 5)
+ %a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, i64 %avl)
%a.bitcast = bitcast <vscale x 1 x i64> %a to <vscale x 2 x i32>
%b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(
<vscale x 2 x i32> %passthru,
<vscale x 2 x i32> %passthru,
<vscale x 2 x i32> %a.bitcast,
<vscale x 2 x i1> splat (i1 true),
- i64 3
+ i64 %avl
)
ret <vscale x 2 x i32> %b
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
index 4940e652170ab..3952e48c5c28f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
@@ -181,16 +181,16 @@ define <vscale x 2 x i32> @unfoldable_vredsum(<vscale x 2 x i32> %passthru, <vsc
ret <vscale x 2 x i32> %b
}
-define <vscale x 2 x i32> @unfoldable_mismatched_sew_diff_vl(<vscale x 2 x i32> %passthru, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y) {
-; CHECK-LABEL: unfoldable_mismatched_sew_diff_vl:
+define <vscale x 2 x i32> @unfoldable_mismatched_sew(<vscale x 2 x i32> %passthru, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, iXLen %avl) {
+; CHECK-LABEL: unfoldable_mismatched_sew:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 6, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vadd.vv v9, v9, v10
-; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
- %a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, iXLen 6)
+ %a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, iXLen %avl)
%a.bitcast = bitcast <vscale x 1 x i64> %a to <vscale x 2 x i32>
- %b = call <vscale x 2 x i32> @llvm.riscv.vmv.v.v.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a.bitcast, iXLen 3)
+ %b = call <vscale x 2 x i32> @llvm.riscv.vmv.v.v.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a.bitcast, iXLen %avl)
ret <vscale x 2 x i32> %b
}
More information about the llvm-commits
mailing list