[llvm] [RISCV] More explicitly check that combineOp_VLToVWOp_VL removes the extends it is supposed to. (PR #166710)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 6 09:11:19 PST 2025
https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/166710
>From fe7988b685d0bdfed8b40f6e2bf443417e945ce5 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 5 Nov 2025 20:53:16 -0800
Subject: [PATCH 1/3] [RISCV] Add test cases for widening add/sub with
mismatched extends. NFC
These are test cases where we have an add and a sub with the same
operands. One operand is a sign extend and the other is a zero extend.
The sub can only form a vwsub.wv but because add is commutable, it could
form vwadd.wv or vwaddu.wv depending on which extend is removed. We want
to form vwadd.wv to match the sub so the vsext can be removed.
Depending on the order of the instructions and the operand order
of the add, we might form vwaddu.wv instead.
---
.../RISCV/rvv/vscale-vw-web-simplification.ll | 76 +++++++++++++++++++
1 file changed, 76 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
index ad2ed47e67e64..b1f0eee3e9f52 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
@@ -570,7 +570,83 @@ define <vscale x 2 x i32> @vwop_vscale_zext_i8i32_multiple_users(ptr %x, ptr %y,
ret <vscale x 2 x i32> %i
}
+define <vscale x 4 x i32> @mismatched_extend_sub_add(<vscale x 4 x i16> %x, <vscale x 4 x i16> %y) {
+; FOLDING-LABEL: mismatched_extend_sub_add:
+; FOLDING: # %bb.0:
+; FOLDING-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; FOLDING-NEXT: vzext.vf2 v10, v8
+; FOLDING-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; FOLDING-NEXT: vwsub.wv v12, v10, v9
+; FOLDING-NEXT: vwadd.wv v10, v10, v9
+; FOLDING-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; FOLDING-NEXT: vmul.vv v8, v12, v10
+; FOLDING-NEXT: ret
+ %a = zext <vscale x 4 x i16> %x to <vscale x 4 x i32>
+ %b = sext <vscale x 4 x i16> %y to <vscale x 4 x i32>
+ %c = sub <vscale x 4 x i32> %a, %b
+ %d = add <vscale x 4 x i32> %a, %b
+ %e = mul <vscale x 4 x i32> %c, %d
+ ret <vscale x 4 x i32> %e
+}
+
+; FIXME: this should remove the vsext
+define <vscale x 4 x i32> @mismatched_extend_sub_add_commuted(<vscale x 4 x i16> %x, <vscale x 4 x i16> %y) {
+; FOLDING-LABEL: mismatched_extend_sub_add_commuted:
+; FOLDING: # %bb.0:
+; FOLDING-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; FOLDING-NEXT: vzext.vf2 v10, v8
+; FOLDING-NEXT: vsext.vf2 v12, v9
+; FOLDING-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; FOLDING-NEXT: vwsub.wv v10, v10, v9
+; FOLDING-NEXT: vwaddu.wv v12, v12, v8
+; FOLDING-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; FOLDING-NEXT: vmul.vv v8, v10, v12
+; FOLDING-NEXT: ret
+ %a = zext <vscale x 4 x i16> %x to <vscale x 4 x i32>
+ %b = sext <vscale x 4 x i16> %y to <vscale x 4 x i32>
+ %c = sub <vscale x 4 x i32> %a, %b
+ %d = add <vscale x 4 x i32> %b, %a
+ %e = mul <vscale x 4 x i32> %c, %d
+ ret <vscale x 4 x i32> %e
+}
+define <vscale x 4 x i32> @mismatched_extend_add_sub(<vscale x 4 x i16> %x, <vscale x 4 x i16> %y) {
+; FOLDING-LABEL: mismatched_extend_add_sub:
+; FOLDING: # %bb.0:
+; FOLDING-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; FOLDING-NEXT: vzext.vf2 v10, v8
+; FOLDING-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; FOLDING-NEXT: vwadd.wv v12, v10, v9
+; FOLDING-NEXT: vwsub.wv v10, v10, v9
+; FOLDING-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; FOLDING-NEXT: vmul.vv v8, v12, v10
+; FOLDING-NEXT: ret
+ %a = zext <vscale x 4 x i16> %x to <vscale x 4 x i32>
+ %b = sext <vscale x 4 x i16> %y to <vscale x 4 x i32>
+ %c = add <vscale x 4 x i32> %a, %b
+ %d = sub <vscale x 4 x i32> %a, %b
+ %e = mul <vscale x 4 x i32> %c, %d
+ ret <vscale x 4 x i32> %e
+}
+
+define <vscale x 4 x i32> @mismatched_extend_add_sub_commuted(<vscale x 4 x i16> %x, <vscale x 4 x i16> %y) {
+; FOLDING-LABEL: mismatched_extend_add_sub_commuted:
+; FOLDING: # %bb.0:
+; FOLDING-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; FOLDING-NEXT: vzext.vf2 v10, v8
+; FOLDING-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; FOLDING-NEXT: vwadd.wv v12, v10, v9
+; FOLDING-NEXT: vwsub.wv v10, v10, v9
+; FOLDING-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; FOLDING-NEXT: vmul.vv v8, v12, v10
+; FOLDING-NEXT: ret
+ %a = zext <vscale x 4 x i16> %x to <vscale x 4 x i32>
+ %b = sext <vscale x 4 x i16> %y to <vscale x 4 x i32>
+ %c = add <vscale x 4 x i32> %a, %b
+ %d = sub <vscale x 4 x i32> %a, %b
+ %e = mul <vscale x 4 x i32> %c, %d
+ ret <vscale x 4 x i32> %e
+}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV32: {{.*}}
>From b7bd5e28eae9bfed2b813720f6c8af863dcd1626 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 5 Nov 2025 21:51:04 -0800
Subject: [PATCH 2/3] [RISCV] More explicitly check that combineOp_VLToVWOp_VL
removes the extends it is supposed to.
If we visit multiple root nodes, make sure the strategy chosen
for other nodes includes the nodes we've already committed to remove.
This can occur if have add/sub nodes where one operand is a zext
and the other is a sext. We might see that the nodes share a common
extension but pick a strategy that doesn't share it.
Stacked on #166700
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 14 +++++++++++++-
.../RISCV/rvv/vscale-vw-web-simplification.ll | 7 +++----
2 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c3f100e3197b1..3b5dfd5845f0b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -17876,6 +17876,7 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N,
SmallVector<SDNode *> Worklist;
SmallPtrSet<SDNode *, 8> Inserted;
+ SmallPtrSet<SDNode *, 8> ExtensionsToRemove;
Worklist.push_back(N);
Inserted.insert(N);
SmallVector<CombineResult> CombinesToApply;
@@ -17886,8 +17887,10 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N,
NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
- &Inserted](const NodeExtensionHelper &Op) {
+ &Inserted, &ExtensionsToRemove](const NodeExtensionHelper &Op) {
if (Op.needToPromoteOtherUsers()) {
+ // Remember that we're supposed to remove this extension.
+ ExtensionsToRemove.insert(Op.OrigOperand.getNode());
for (SDUse &Use : Op.OrigOperand->uses()) {
SDNode *TheUser = Use.getUser();
if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
@@ -17921,6 +17924,15 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N,
std::optional<CombineResult> Res =
FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
if (Res) {
+ // If this strategy wouldn't remove an extension we're supposed to
+ // remove, reject it.
+ if (!Res->LHSExt.has_value() &&
+ ExtensionsToRemove.contains(LHS.OrigOperand.getNode()))
+ continue;
+ if (!Res->RHSExt.has_value() &&
+ ExtensionsToRemove.contains(RHS.OrigOperand.getNode()))
+ continue;
+
Matched = true;
CombinesToApply.push_back(*Res);
// All the inputs that are extended need to be folded, otherwise
diff --git a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
index b1f0eee3e9f52..034186210513c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
@@ -595,12 +595,11 @@ define <vscale x 4 x i32> @mismatched_extend_sub_add_commuted(<vscale x 4 x i16>
; FOLDING: # %bb.0:
; FOLDING-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; FOLDING-NEXT: vzext.vf2 v10, v8
-; FOLDING-NEXT: vsext.vf2 v12, v9
; FOLDING-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; FOLDING-NEXT: vwsub.wv v10, v10, v9
-; FOLDING-NEXT: vwaddu.wv v12, v12, v8
+; FOLDING-NEXT: vwsub.wv v12, v10, v9
+; FOLDING-NEXT: vwadd.wv v10, v10, v9
; FOLDING-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; FOLDING-NEXT: vmul.vv v8, v10, v12
+; FOLDING-NEXT: vmul.vv v8, v12, v10
; FOLDING-NEXT: ret
%a = zext <vscale x 4 x i16> %x to <vscale x 4 x i32>
%b = sext <vscale x 4 x i16> %y to <vscale x 4 x i32>
>From d4fb6204bb91e099b3ca640401b84fbfed8e6878 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 6 Nov 2025 07:37:34 -0800
Subject: [PATCH 3/3] fixup! clang-format
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 37 +++++++++++----------
1 file changed, 19 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 3b5dfd5845f0b..092841a71f2fa 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -17886,24 +17886,25 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N,
NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
- auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
- &Inserted, &ExtensionsToRemove](const NodeExtensionHelper &Op) {
- if (Op.needToPromoteOtherUsers()) {
- // Remember that we're supposed to remove this extension.
- ExtensionsToRemove.insert(Op.OrigOperand.getNode());
- for (SDUse &Use : Op.OrigOperand->uses()) {
- SDNode *TheUser = Use.getUser();
- if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
- return false;
- // We only support the first 2 operands of FMA.
- if (Use.getOperandNo() >= 2)
- return false;
- if (Inserted.insert(TheUser).second)
- Worklist.push_back(TheUser);
- }
- }
- return true;
- };
+ auto AppendUsersIfNeeded =
+ [&Worklist, &Subtarget, &Inserted,
+ &ExtensionsToRemove](const NodeExtensionHelper &Op) {
+ if (Op.needToPromoteOtherUsers()) {
+ // Remember that we're supposed to remove this extension.
+ ExtensionsToRemove.insert(Op.OrigOperand.getNode());
+ for (SDUse &Use : Op.OrigOperand->uses()) {
+ SDNode *TheUser = Use.getUser();
+ if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
+ return false;
+ // We only support the first 2 operands of FMA.
+ if (Use.getOperandNo() >= 2)
+ return false;
+ if (Inserted.insert(TheUser).second)
+ Worklist.push_back(TheUser);
+ }
+ }
+ return true;
+ };
// Control the compile time by limiting the number of node we look at in
// total.
More information about the llvm-commits
mailing list