[llvm] 3e65ad7 - [AArch64] Combine Trunc(DUP) -> DUP
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 21 07:00:05 PST 2022
Author: David Green
Date: 2022-12-21T14:59:59Z
New Revision: 3e65ad7482e9e612abcc115f8fb2ed379fcad612
URL: https://github.com/llvm/llvm-project/commit/3e65ad7482e9e612abcc115f8fb2ed379fcad612
DIFF: https://github.com/llvm/llvm-project/commit/3e65ad7482e9e612abcc115f8fb2ed379fcad612.diff
LOG: [AArch64] Combine Trunc(DUP) -> DUP
This adds a simple fold of TRUNCATE(AArch64ISD::DUP) -> AArch64ISD::DUP,
which can help generate more optimal UMULL sequences, and seems useful
in general.
Differential Revision: https://reviews.llvm.org/D140289
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
llvm/test/CodeGen/AArch64/aarch64-smull.ll
llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index faee3f8c2cb8..058a124fed38 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -930,6 +930,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
ISD::VECTOR_SPLICE, ISD::SIGN_EXTEND_INREG,
ISD::CONCAT_VECTORS, ISD::EXTRACT_SUBVECTOR,
ISD::INSERT_SUBVECTOR, ISD::STORE, ISD::BUILD_VECTOR});
+ setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::MSTORE);
@@ -17392,6 +17393,22 @@ static SDValue performBuildVectorCombine(SDNode *N,
return SDValue();
}
+static SDValue performTruncateCombine(SDNode *N,
+ SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ if (VT.isFixedLengthVector() && VT.is64BitVector() && N0.hasOneUse() &&
+ N0.getOpcode() == AArch64ISD::DUP) {
+ SDValue Op = N0.getOperand(0);
+ if (VT.getScalarType() == MVT::i32 &&
+ N0.getOperand(0).getValueType().getScalarType() == MVT::i64)
+ Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i32, Op);
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Op);
+ }
+
+ return SDValue();
+}
+
// Check an node is an extend or shift operand
static bool isExtendOrShiftOperand(SDValue N) {
unsigned Opcode = N.getOpcode();
@@ -21192,6 +21209,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performAddSubCombine(N, DCI, DAG);
case ISD::BUILD_VECTOR:
return performBuildVectorCombine(N, DCI, DAG);
+ case ISD::TRUNCATE:
+ return performTruncateCombine(N, DAG);
case AArch64ISD::ANDS:
return performFlagSettingCombine(N, DCI, ISD::AND);
case AArch64ISD::ADC:
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
index 197c08dc25fb..f02d3f1eb750 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
@@ -116,12 +116,10 @@ entry:
define <2 x i64> @dupzext_v2i16_v2i64(i16 %src, <2 x i16> %b) {
; CHECK-LABEL: dupzext_v2i16_v2i64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: and x8, x0, #0xffff
+; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: movi d1, #0x00ffff0000ffff
-; CHECK-NEXT: dup v2.2d, x8
+; CHECK-NEXT: dup v2.2s, w8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: xtn v2.2s, v2.2d
; CHECK-NEXT: umull v0.2d, v2.2s, v0.2s
; CHECK-NEXT: ret
entry:
@@ -223,9 +221,8 @@ define <8 x i16> @typei1_v8i1_v8i16(i1 %src, <8 x i1> %b) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: and w8, w0, #0x1
; CHECK-NEXT: movi v1.8b, #1
-; CHECK-NEXT: dup v2.8h, w8
+; CHECK-NEXT: dup v2.8b, w8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: xtn v2.8b, v2.8h
; CHECK-NEXT: umull v0.8h, v2.8b, v0.8b
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 7dd2ae130345..da0e428ec33d 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -1038,11 +1038,9 @@ define <8 x i32> @umull_and_v8i32_dup(<8 x i16> %src1, i32 %src2) {
; CHECK-LABEL: umull_and_v8i32_dup:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: dup v2.4s, w8
-; CHECK-NEXT: xtn v2.4h, v2.4s
+; CHECK-NEXT: dup v2.8h, w8
+; CHECK-NEXT: umull2 v1.4s, v0.8h, v2.8h
; CHECK-NEXT: umull v0.4s, v0.4h, v2.4h
-; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h
; CHECK-NEXT: ret
entry:
%in1 = zext <8 x i16> %src1 to <8 x i32>
@@ -1090,12 +1088,10 @@ entry:
define <4 x i64> @umull_and_v4i64_dup(<4 x i32> %src1, i64 %src2) {
; CHECK-LABEL: umull_and_v4i64_dup:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: and x8, x0, #0xff
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: dup v2.2d, x8
-; CHECK-NEXT: xtn v2.2s, v2.2d
+; CHECK-NEXT: and w8, w0, #0xff
+; CHECK-NEXT: dup v2.4s, w8
+; CHECK-NEXT: umull2 v1.2d, v0.4s, v2.4s
; CHECK-NEXT: umull v0.2d, v0.2s, v2.2s
-; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
; CHECK-NEXT: ret
entry:
%in1 = zext <4 x i32> %src1 to <4 x i64>
diff --git a/llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll b/llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll
index 4fccf4d07405..f1bca142bd18 100644
--- a/llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll
+++ b/llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll
@@ -11,8 +11,7 @@ define void @no_combine(i32 %p) local_unnamed_addr {
; CHECK-LABEL: no_combine:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v0.4h, #4
-; CHECK-NEXT: dup v1.4s, w0
-; CHECK-NEXT: xtn v1.4h, v1.4s
+; CHECK-NEXT: dup v1.4h, w0
; CHECK-NEXT: mov v1.d[1], v0.d[0]
; CHECK-NEXT: uzp1 v0.16b, v1.16b, v1.16b
; CHECK-NEXT: str q0, [x8]
More information about the llvm-commits
mailing list