[PATCH] D60890: [AArch64] splat before (f)mul to allow mul-by-element isel
Sanjay Patel via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 18 15:07:05 PDT 2019
spatel created this revision.
spatel added reviewers: efriedma, sdesmalen, dmgreen.
Herald added subscribers: hiraditya, kristof.beyls, javed.absar, mcrosier.
Herald added a project: LLVM.
A splat of a vector multiply (either integer or FP) can be turned into a multiply-by-element:
splat (mul X, Y), Lane --> mul (splat X, Lane), (splat Y, Lane) --> mul-by-element (splat X, Lane), Y.[Lane]
These patterns showed up as an ARM regression in D60214 <https://reviews.llvm.org/D60214>, but we have this transform in IR, so it's an existing problem IIUC.
The constant cases look better, but I'm not sure if this is a win if both operands are variables.
https://reviews.llvm.org/D60890
Files:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/mul_by_elt.ll
Index: llvm/test/CodeGen/AArch64/mul_by_elt.ll
===================================================================
--- llvm/test/CodeGen/AArch64/mul_by_elt.ll
+++ llvm/test/CodeGen/AArch64/mul_by_elt.ll
@@ -19,8 +19,7 @@
; CHECK-LABEL: splat0_after_fmul_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov v1.4s, #3.00000000
-; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: dup v0.4s, v0.s[0]
+; CHECK-NEXT: fmul v0.4s, v1.4s, v0.s[0]
; CHECK-NEXT: ret
%mul = fmul <4 x float> %a, <float 3.0, float 42.0, float 3.0, float 3.0>
%splat = shufflevector <4 x float> %mul, <4 x float> undef, <4 x i32> zeroinitializer
@@ -44,8 +43,7 @@
; CHECK-LABEL: splat1_after_fmul_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov v1.2d, #5.00000000
-; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
-; CHECK-NEXT: dup v0.2d, v0.d[1]
+; CHECK-NEXT: fmul v0.2d, v1.2d, v0.d[1]
; CHECK-NEXT: ret
%mul = fmul <2 x double> %a, <double -1.0, double 5.0>
%splat = shufflevector <2 x double> %mul, <2 x double> undef, <2 x i32> <i32 1, i32 1>
@@ -57,8 +55,8 @@
define <2 x double> @splat1_before_fmul(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: splat1_before_fmul:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
; CHECK-NEXT: dup v0.2d, v0.d[1]
+; CHECK-NEXT: fmul v0.2d, v0.2d, v1.d[1]
; CHECK-NEXT: ret
%splata = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 1>
%splatb = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> <i32 1, i32 1>
@@ -69,8 +67,8 @@
define <2 x double> @splat1_after_fmul(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: splat1_after_fmul:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
; CHECK-NEXT: dup v0.2d, v0.d[1]
+; CHECK-NEXT: fmul v0.2d, v0.2d, v1.d[1]
; CHECK-NEXT: ret
%mul = fmul <2 x double> %a, %b
%splat = shufflevector <2 x double> %mul, <2 x double> undef, <2 x i32> <i32 1, i32 1>
@@ -94,8 +92,7 @@
; CHECK-LABEL: splat2_after_mul_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.4s, #3
-; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: dup v0.4s, v0.s[2]
+; CHECK-NEXT: mul v0.4s, v1.4s, v0.s[2]
; CHECK-NEXT: ret
%mul = mul <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
%splat = shufflevector <4 x i32> %mul, <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef>
@@ -105,8 +102,8 @@
define <8 x i16> @splat1_before_mul(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: splat1_before_mul:
; CHECK: // %bb.0:
-; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-NEXT: dup v0.8h, v0.h[1]
+; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[1]
; CHECK-NEXT: ret
%splata = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%splatb = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -117,11 +114,10 @@
define <8 x i16> @splat1_after_mul(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: splat1_after_mul:
; CHECK: // %bb.0:
-; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-NEXT: dup v0.8h, v0.h[1]
+; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[1]
; CHECK-NEXT: ret
%mul = mul <8 x i16> %a, %b
%splat = shufflevector <8 x i16> %mul, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
ret <8 x i16> %splat
}
-
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6597,6 +6597,17 @@
!isa<ConstantSDNode>(V1.getOperand(Lane)))
return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane));
+ // Splat multiply operands to allow selecting this as an (f)mul by element:
+ // splat (mul X, Y), Lane --> mul (splat X, Lane), (splat Y, Lane)
+ if (V1.hasOneUse() &&
+ (V1.getOpcode() == ISD::FMUL || V1.getOpcode() == ISD::MUL)) {
+ SDValue SplatX = DAG.getVectorShuffle(VT, dl, V1.getOperand(0),
+ DAG.getUNDEF(VT), ShuffleMask);
+ SDValue SplatY = DAG.getVectorShuffle(VT, dl, V1.getOperand(1),
+ DAG.getUNDEF(VT), ShuffleMask);
+ return DAG.getNode(V1.getOpcode(), dl, VT, SplatX, SplatY);
+ }
+
// Otherwise, duplicate from the lane of the input vector.
unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D60890.195817.patch
Type: text/x-patch
Size: 4576 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190418/a3e9b2c3/attachment-0001.bin>
More information about the llvm-commits
mailing list