[llvm] [AArch64] Don't try to sink and(load) (PR #122274)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 10 03:52:52 PST 2025
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/122274
>From 6d45baf9fbecfdc217049ee8691a9f693c78d9ee Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 9 Jan 2025 12:34:01 +0000
Subject: [PATCH 1/2] [AArch64] Don't try to sink and(load)
If we sink the and in and(load), CGP can hoist is back again to the laod,
getting into an infinite loop. This prevents sinking the and in this case.
Fixes #122074
---
.../AArch64/AArch64TargetTransformInfo.cpp | 5 +-
.../CodeGen/AArch64/aarch64-dup-ext-crash.ll | 46 +++++++++++++++++++
2 files changed, 50 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 29ea098386cec1..932a6f9ce23fd2 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -5514,7 +5514,10 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
NumZExts++;
}
- Ops.push_back(&Insert->getOperandUse(1));
+ // And(Load) is excluded to prevent CGP getting stuck in a loop of sinking
+ // the And, just to hoist it again back to the load.
+ if (!match(OperandInstr, m_And(m_Load(m_Value()), m_Value())))
+ Ops.push_back(&Insert->getOperandUse(1));
Ops.push_back(&Shuffle->getOperandUse(0));
Ops.push_back(&Op);
}
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll
index 95c54cd8b01511..60781466f0eb79 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll
@@ -40,3 +40,49 @@ vector.body: ; preds = %vector.body, %vecto
store <2 x i32> %3, ptr %4, align 4
br label %vector.body
}
+
+; This test got stuck in a loop hoisting the and to the load, and sinking it back to the mull
+define i32 @dup_and_load(ptr %p, i1 %c) {
+; CHECK-LABEL: dup_and_load:
+; CHECK: // %bb.0: // %for.body.lr.ph
+; CHECK-NEXT: mov x8, x0
+; CHECK-NEXT: ldrb w0, [x0]
+; CHECK-NEXT: tbz w1, #0, .LBB1_3
+; CHECK-NEXT: // %bb.1: // %ph
+; CHECK-NEXT: dup v0.8h, w0
+; CHECK-NEXT: mov w9, wzr
+; CHECK-NEXT: .LBB1_2: // %vector.body
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldr d1, [x8]
+; CHECK-NEXT: add w9, w9, #1
+; CHECK-NEXT: cmp w9, #100
+; CHECK-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT: umull v1.4s, v0.4h, v1.4h
+; CHECK-NEXT: stp q1, q2, [x8]
+; CHECK-NEXT: b.lt .LBB1_2
+; CHECK-NEXT: .LBB1_3: // %end
+; CHECK-NEXT: ret
+for.body.lr.ph:
+ %l = load i32, ptr %p
+ %conv314 = and i32 %l, 255
+ br i1 %c, label %ph, label %end
+
+ph:
+ %broadcast.splatinsert = insertelement <8 x i32> poison, i32 %conv314, i32 0
+ %broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> poison, <8 x i32> zeroinitializer
+ br label %vector.body
+
+vector.body: ; preds = %vector.body, %vector.ph
+ %iv = phi i32 [ 0, %ph ], [ %iv.next, %vector.body ]
+ %wide.load = load <8 x i8>, ptr %p, align 4
+ %0 = zext <8 x i8> %wide.load to <8 x i32>
+ %1 = mul <8 x i32> %broadcast.splat, %0
+ store <8 x i32> %1, ptr %p, align 4
+ %iv.next = add i32 %iv, 1
+ %e = icmp slt i32 %iv.next, 100
+ br i1 %e, label %vector.body, label %end
+
+end:
+ ret i32 %conv314
+}
>From e295a9f4a1b5c2b162bd0e8f37fcb3cd23616bf0 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 10 Jan 2025 11:51:32 +0000
Subject: [PATCH 2/2] Address some super nitpicky nits
---
llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll
index 60781466f0eb79..478c1be8821f6c 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll
@@ -9,7 +9,7 @@ target triple = "aarch64-unknown-linux-gnu"
; here, only that this case no longer causes said crash.
define dso_local i32 @dupext_crashtest(i32 %e) local_unnamed_addr {
; CHECK-LABEL: dupext_crashtest:
-; CHECK: // %bb.0: // %for.body.lr.ph
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: dup v0.2s, w0
; CHECK-NEXT: .LBB0_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
@@ -18,7 +18,7 @@ define dso_local i32 @dupext_crashtest(i32 %e) local_unnamed_addr {
; CHECK-NEXT: xtn v1.2s, v1.2d
; CHECK-NEXT: str d1, [x8]
; CHECK-NEXT: b .LBB0_1
-for.body.lr.ph:
+entry:
%conv314 = zext i32 %e to i64
br label %vector.memcheck
@@ -44,7 +44,7 @@ vector.body: ; preds = %vector.body, %vecto
; This test got stuck in a loop hoisting the and to the load, and sinking it back to the mull
define i32 @dup_and_load(ptr %p, i1 %c) {
; CHECK-LABEL: dup_and_load:
-; CHECK: // %bb.0: // %for.body.lr.ph
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov x8, x0
; CHECK-NEXT: ldrb w0, [x0]
; CHECK-NEXT: tbz w1, #0, .LBB1_3
@@ -63,13 +63,13 @@ define i32 @dup_and_load(ptr %p, i1 %c) {
; CHECK-NEXT: b.lt .LBB1_2
; CHECK-NEXT: .LBB1_3: // %end
; CHECK-NEXT: ret
-for.body.lr.ph:
+entry:
%l = load i32, ptr %p
- %conv314 = and i32 %l, 255
+ %and255 = and i32 %l, 255
br i1 %c, label %ph, label %end
ph:
- %broadcast.splatinsert = insertelement <8 x i32> poison, i32 %conv314, i32 0
+ %broadcast.splatinsert = insertelement <8 x i32> poison, i32 %and255, i32 0
%broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> poison, <8 x i32> zeroinitializer
br label %vector.body
@@ -84,5 +84,5 @@ vector.body: ; preds = %vector.body, %vecto
br i1 %e, label %vector.body, label %end
end:
- ret i32 %conv314
+ ret i32 %and255
}
More information about the llvm-commits
mailing list