[PATCH] D133739: [RISCV][WIP] Form more VW instructions
Quentin Colombet via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 12 17:27:15 PDT 2022
qcolombet created this revision.
qcolombet added reviewers: craig.topper, dcaballe.
Herald added subscribers: sunshaoce, VincentWu, luke957, StephenFan, vkmr, frasercrmck, evandro, luismarques, apazos, sameer.abuasal, s.egerton, Jim, benna, psnobl, jocewei, PkmX, the_o, brucehoult, MartinMosbeck, rogfer01, edward-jones, zzheng, jrtc27, shiva0217, kito-cheng, niosHD, sabuasal, simoncook, johnrusso, rbar, asb, hiraditya, arichardson.
Herald added a project: All.
qcolombet requested review of this revision.
Herald added subscribers: pcwang-thead, eopXD, MaskRay.
Herald added a project: LLVM.
This patch aims at starting a conversation about how people think we should approach forming VW variants (operations that widen their inputs arguments) more aggressively.
Currently we fold sign/zero extensions in instructions that support widening only when the result of the extension is used only once.
The current (WIP) patch lifts this limitation by checking whether all the users of the extension support the folding and by allowing the transformation when that's the case.
The patch is far from being perfect because it doesn't actually check that the folding will happen for all the instructions (and in true SDISel fashion will be defeated by basic block boundaries) but demonstrates what could be achieved, codegen-wise, with the added test:
--- old_codegen.s 2022-09-13 00:12:48.989575265 +0000
+++ new_codegen.s 2022-09-13 00:13:02.134793836 +0000
@@ -16,30 +16,28 @@
.Lfunc_end0:
.size vwmul_v2i16, .Lfunc_end0-vwmul_v2i16
.cfi_endproc
# -- End function
.globl vwmul_v2i16_multiple_users # -- Begin function vwmul_v2i16_multiple_users
.p2align 2
.type vwmul_v2i16_multiple_users, at function
vwmul_v2i16_multiple_users: # @vwmul_v2i16_multiple_users
.cfi_startproc
# %bb.0:
- vsetivli zero, 2, e16, mf4, ta, mu
+ vsetivli zero, 2, e8, mf8, ta, mu
vle8.v v8, (a0)
vle8.v v9, (a1)
vle8.v v10, (a2)
- vsext.vf2 v11, v8
- vsext.vf2 v8, v9
- vsext.vf2 v9, v10
- vmul.vv v8, v11, v8
- vmul.vv v9, v11, v9
- vor.vv v8, v8, v9
+ vwmul.vv v11, v8, v9
+ vwmul.vv v9, v8, v10
+ vsetvli zero, zero, e16, mf4, ta, mu
+ vor.vv v8, v11, v9
@craig.topper How do you think we should approach forming VW instructions?
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D133739
Files:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll
Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll
===================================================================
--- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll
+++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll
@@ -18,6 +18,30 @@
ret <2 x i16> %e
}
+define <2 x i16> @vwmul_v2i16_multiple_users(<2 x i8>* %x, <2 x i8>* %y, <2 x i8> *%z) {
+; CHECK-LABEL: vwmul_v2i16_multiple_users:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vle8.v v9, (a1)
+; CHECK-NEXT: vle8.v v10, (a2)
+; CHECK-NEXT: vwmul.vv v11, v8, v9
+; CHECK-NEXT: vwmul.vv v9, v8, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
+; CHECK-NEXT: vor.vv v8, v11, v9
+; CHECK-NEXT: ret
+ %a = load <2 x i8>, <2 x i8>* %x
+ %b = load <2 x i8>, <2 x i8>* %y
+ %b2 = load <2 x i8>, <2 x i8>* %z
+ %c = sext <2 x i8> %a to <2 x i16>
+ %d = sext <2 x i8> %b to <2 x i16>
+ %d2 = sext <2 x i8> %b2 to <2 x i16>
+ %e = mul <2 x i16> %c, %d
+ %f = mul <2 x i16> %c, %d2
+ %g = or <2 x i16> %e, %f
+ ret <2 x i16> %g
+}
+
define <4 x i16> @vwmul_v4i16(<4 x i8>* %x, <4 x i8>* %y) {
; CHECK-LABEL: vwmul_v4i16:
; CHECK: # %bb.0:
Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp
===================================================================
--- llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -8570,6 +8570,25 @@
return SDValue();
}
+/// Check whether \p N supports doing sign/zero extension of its arguments.
+bool canFoldExtensionInOpcode(const SDNode *N) {
+ switch (N->getOpcode()) {
+ case RISCVISD::ADD_VL:
+ case RISCVISD::SUB_VL:
+ case RISCVISD::MUL_VL:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// Check if all the users of \p Val support sign/zero extending their
+/// arguments.
+bool canFoldExtensionInAllUsers(SDValue Val) {
+ return std::all_of(Val->use_begin(), Val->use_end(),
+ canFoldExtensionInOpcode);
+}
+
// Try to form vwadd(u).wv/wx or vwsub(u).wv/wx. It might later be optimized to
// vwadd(u).vv/vx or vwsub(u).vv/vx.
static SDValue combineADDSUB_VLToVWADDSUB_VL(SDNode *N, SelectionDAG &DAG,
@@ -8599,7 +8618,8 @@
// If the RHS is a sext or zext, we can form a widening op.
if ((Op1.getOpcode() == RISCVISD::VZEXT_VL ||
Op1.getOpcode() == RISCVISD::VSEXT_VL) &&
- Op1.hasOneUse() && Op1.getOperand(1) == Mask && Op1.getOperand(2) == VL) {
+ canFoldExtensionInAllUsers(Op1) && Op1.getOperand(1) == Mask &&
+ Op1.getOperand(2) == VL) {
unsigned ExtOpc = Op1.getOpcode();
Op1 = Op1.getOperand(0);
// Re-introduce narrower extends if needed.
@@ -8709,7 +8729,7 @@
bool IsSignExt = Op0.getOpcode() == RISCVISD::VSEXT_VL;
bool IsZeroExt = Op0.getOpcode() == RISCVISD::VZEXT_VL;
bool IsVWMULSU = IsSignExt && Op1.getOpcode() == RISCVISD::VZEXT_VL;
- if ((!IsSignExt && !IsZeroExt) || !Op0.hasOneUse())
+ if ((!IsSignExt && !IsZeroExt) || !canFoldExtensionInAllUsers(Op0))
return SDValue();
SDValue Merge = N->getOperand(2);
@@ -8731,7 +8751,7 @@
// See if the other operand is the same opcode.
if (IsVWMULSU || Op0.getOpcode() == Op1.getOpcode()) {
- if (!Op1.hasOneUse())
+ if (!canFoldExtensionInAllUsers(Op1))
return SDValue();
// Make sure the mask and VL match.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D133739.459595.patch
Type: text/x-patch
Size: 3397 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220913/40d344f4/attachment.bin>
More information about the llvm-commits
mailing list