[llvm] [LAA] Improve the output remark for LoopVectorize (PR #65832)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 8 22:02:00 PDT 2023
https://github.com/vfdff created https://github.com/llvm/llvm-project/pull/65832:
None
>From e5fec11383c530704c2d1ca745324a5441aa8293 Mon Sep 17 00:00:00 2001
From: Zhongyunde <zhongyunde at huawei.com>
Date: Sat, 9 Sep 2023 11:59:41 +0800
Subject: [PATCH 1/2] [GIsel][AArch64] Legalize <2 x i16> for
G_INSERT_VECTOR_ELT
Widen the vector elements to 64 bits to make sure it legal instead by
clamping the number of elements. Depend on D153394.
Fixes https://github.com/llvm/llvm-project/issues/63826
---
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 14 +++++++++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 3 +-
.../GlobalISel/legalize-insert-vector-elt.mir | 31 ++++++++++---------
.../GlobalISel/select-bitfield-insert.ll | 12 +++++++
4 files changed, 44 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index cd08bf2c1a726b6..6ef56699379b4ce 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2493,6 +2493,20 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return Legalized;
}
case TargetOpcode::G_INSERT_VECTOR_ELT: {
+ if (TypeIdx == 0) {
+ Observer.changingInstr(MI);
+
+ Register VecReg = MI.getOperand(1).getReg();
+ LLT VecTy = MRI.getType(VecReg);
+ const LLT WideEltTy = WideTy.getElementType();
+
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
if (TypeIdx == 1) {
Observer.changingInstr(MI);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 00424f23e96d7d9..c50e87a101ba8a5 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -718,8 +718,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
.legalIf(typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64}))
- .clampMinNumElements(0, s16, 4)
- .clampMaxNumElements(0, s16, 8);
+ .widenVectorEltsToVectorMinSize(0, 64);
getActionDefinitionsBuilder(G_BUILD_VECTOR)
.legalFor({{v8s8, s8},
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir
index 26db18bd611a57c..426af02668e05b2 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir
@@ -4,21 +4,24 @@
name: pr63826
body: |
bb.0:
+ liveins: $d0, $w0
; CHECK-LABEL: name: pr63826
- ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $w0
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[DEF]](s16), [[DEF]](s16)
- ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s16>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[C]](s16), [[C1]](s32)
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[IVEC]](<4 x s16>)
- ; CHECK-NEXT: $w0 = COPY [[UV2]](<2 x s16>)
- %0:_(<2 x s16>) = COPY $w0
- %1:_(s16) = G_CONSTANT i16 1
- %2:_(s32) = G_CONSTANT i32 42
- %4:_(<2 x s16>) = G_INSERT_VECTOR_ELT %0(<2 x s16>), %1(s16), %2(s32)
- $w0 = COPY %4(<2 x s16>)
+ ; CHECK: liveins: $d0, $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[C1]](s32), [[C]](s32)
+ ; CHECK-NEXT: $d0 = COPY [[IVEC]](<2 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %3:_(<2 x s32>) = COPY $d0
+ %1:_(<2 x s8>) = G_TRUNC %3(<2 x s32>)
+ %6:_(s32) = G_CONSTANT i32 0
+ %5:_(s8) = G_CONSTANT i8 1
+ %4:_(<2 x s8>) = G_INSERT_VECTOR_ELT %1, %5(s8), %6(s32)
+ %7:_(<2 x s32>) = G_ANYEXT %4(<2 x s8>)
+ $d0 = COPY %7(<2 x s32>)
+ RET_ReallyLR implicit $d0
...
---
name: v8s8
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll b/llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll
index e1df07c93ebf1f2..8ffa419c602eb31 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll
@@ -155,3 +155,15 @@ bb:
store i64 %tmp4, ptr %p
ret i64 %out
}
+
+define <2 x i16> @pr63826(<2 x i16> %vec) {
+; CHECK-LABEL: pr63826:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov w8, #1 ; =0x1
+; CHECK-NEXT: mov.s v0[0], w8
+; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
+ %vec1 = insertelement <2 x i16> %vec, i16 1, i32 0
+ ret <2 x i16> %vec1
+}
>From 6e3eea02666ba29a2751bb2589e355757a6d781d Mon Sep 17 00:00:00 2001
From: Zhongyunde <zhongyunde at huawei.com>
Date: Sat, 9 Sep 2023 12:59:33 +0800
Subject: [PATCH 2/2] [LAA] Improve the output remark for LoopVectorize
Don't report 'Use #pragma loop distribute(enable) to allow loop
distribution' when we already add #pragma clang loop distribute(enable)
Fixes https://github.com/llvm/llvm-project/issues/64637
---
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 22 +++++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 4dd150492453f72..8a779ac9fb94f64 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2505,12 +2505,24 @@ void LoopAccessInfo::emitUnsafeDependenceRemark() {
LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
// Emit remark for first unsafe dependence
+ bool HasForcedDistribution = false;
+ std::optional<const MDOperand *> Value =
+ findStringMetadataForLoop(TheLoop, "llvm.loop.distribute.enable");
+ if (Value) {
+ const MDOperand *Op = *Value;
+ assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
+ HasForcedDistribution = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
+ }
+
+ const std::string Info =
+ HasForcedDistribution
+ ? "unsafe dependent memory operations in loop."
+ : "unsafe dependent memory operations in loop. Use "
+ "#pragma loop distribute(enable) to allow loop distribution "
+ "to attempt to isolate the offending operations into a separate "
+ "loop";
OptimizationRemarkAnalysis &R =
- recordAnalysis("UnsafeDep", Dep.getDestination(*this))
- << "unsafe dependent memory operations in loop. Use "
- "#pragma loop distribute(enable) to allow loop distribution "
- "to attempt to isolate the offending operations into a separate "
- "loop";
+ recordAnalysis("UnsafeDep", Dep.getDestination(*this)) << Info;
switch (Dep.Type) {
case MemoryDepChecker::Dependence::NoDep:
More information about the llvm-commits
mailing list