[llvm] [CGP] Bail out if (Base|Scaled)Reg does not dominate insert point. (PR #142949)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 6 03:03:22 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/142949
>From 8d72e76354ddc18d0f7968583b05a93a624a8502 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 5 Jun 2025 12:36:01 +0100
Subject: [PATCH 1/2] [CGP] Bail out if (Base|Scaled)Reg does not dominate
insert point.
(Base|Scaled)Reg may not dominate the chosen insert point, if there are
multiple uses of the address. Bail out if that's the case, otherwise we
will generate invalid IR.
In some cases, we could probably adjust the insert point or hoist the
(Base|Scaled)Reg.
---
llvm/lib/CodeGen/CodeGenPrepare.cpp | 11 ++++-
.../X86/sink-addrmode-reg-does-not-geps.ll | 48 +++++++++++++++++++
2 files changed, 57 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-reg-does-not-geps.ll
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 822ed6283117c..222f0f37fed50 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -5945,8 +5945,15 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// The current BB may be optimized multiple times, we can't guarantee the
// reuse of Addr happens later, call findInsertPos to find an appropriate
// insert position.
- IRBuilder<> Builder(MemoryInst->getParent(),
- findInsertPos(Addr, MemoryInst, SunkAddr));
+ auto InsertPos = findInsertPos(Addr, MemoryInst, SunkAddr);
+
+ // TODO: Adjust insert point considering (Base|Scaled)Reg if possible.
+ if (!SunkAddr &&
+ ((AddrMode.BaseReg && !DT->dominates(AddrMode.BaseReg, &*InsertPos)) ||
+ (AddrMode.ScaledReg && !DT->dominates(AddrMode.ScaledReg, &*InsertPos))))
+ return Modified;
+
+ IRBuilder<> Builder(MemoryInst->getParent(), InsertPos);
if (SunkAddr) {
LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-reg-does-not-geps.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-reg-does-not-geps.ll
new file mode 100644
index 0000000000000..38598205c54f9
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-reg-does-not-geps.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' %s | FileCheck %s
+
+
+target triple = "x86_64-unknown-linux"
+
+declare i1 @cond(float)
+
+define void @test(ptr %src) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[BB:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[BB]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[SUNKADDR2:%.*]] = mul i64 [[IV_NEXT]], 2
+; CHECK-NEXT: [[SUNKADDR3:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[SUNKADDR2]]
+; CHECK-NEXT: [[SUNKADDR4:%.*]] = getelementptr i8, ptr [[SUNKADDR3]], i64 6
+; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[SUNKADDR4]], align 4
+; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[IV]], 2
+; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[SUNKADDR]]
+; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[SUNKADDR1]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = call i1 @cond(float [[L_0]])
+; CHECK-NEXT: [[C:%.*]] = call i1 @cond(float [[L_1]])
+; CHECK-NEXT: br i1 [[C]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+bb:
+ %gep.base = getelementptr i8, ptr %src, i64 8
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %bb ], [ %iv.next, %loop ]
+ %iv.shl = shl i64 %iv, 1
+ %gep.shl = getelementptr i8, ptr %gep.base, i64 %iv.shl
+ %gep.sub = getelementptr i8, ptr %gep.shl, i64 -8
+ %iv.next = add i64 %iv, 1
+ %l.0 = load float, ptr %gep.shl, align 4
+ %l.1 = load float, ptr %gep.sub, align 4
+ call i1 @cond(float %l.0)
+ %c = call i1 @cond(float %l.1)
+ br i1 %c, label %loop, label %exit
+
+exit:
+ ret void
+}
>From dc2eccf3e28b18134896dcb0f33fec4212c78205 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 6 Jun 2025 10:30:22 +0100
Subject: [PATCH 2/2] !fixup use getDT, fix crash if CFG is modified.
---
llvm/lib/CodeGen/CodeGenPrepare.cpp | 10 +++---
.../X86/sink-addrmode-reg-does-not-geps.ll | 34 +++++++++++++++++--
2 files changed, 37 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 222f0f37fed50..32348a899683d 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -5948,10 +5948,12 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
auto InsertPos = findInsertPos(Addr, MemoryInst, SunkAddr);
// TODO: Adjust insert point considering (Base|Scaled)Reg if possible.
- if (!SunkAddr &&
- ((AddrMode.BaseReg && !DT->dominates(AddrMode.BaseReg, &*InsertPos)) ||
- (AddrMode.ScaledReg && !DT->dominates(AddrMode.ScaledReg, &*InsertPos))))
- return Modified;
+ if (!SunkAddr) {
+ auto &DT = getDT(*MemoryInst->getFunction());
+ if ((AddrMode.BaseReg && !DT.dominates(AddrMode.BaseReg, &*InsertPos)) ||
+ (AddrMode.ScaledReg && !DT.dominates(AddrMode.ScaledReg, &*InsertPos)))
+ return Modified;
+ }
IRBuilder<> Builder(MemoryInst->getParent(), InsertPos);
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-reg-does-not-geps.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-reg-does-not-geps.ll
index 38598205c54f9..1640bafbd0bf9 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-reg-does-not-geps.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-reg-does-not-geps.ll
@@ -1,13 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' %s | FileCheck %s
-
target triple = "x86_64-unknown-linux"
declare i1 @cond(float)
-define void @test(ptr %src) {
-; CHECK-LABEL: define void @test(
+define void @scaled_reg_does_not_dominate_insert_point(ptr %src) {
+; CHECK-LABEL: define void @scaled_reg_does_not_dominate_insert_point(
; CHECK-SAME: ptr [[SRC:%.*]]) {
; CHECK-NEXT: [[BB:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
@@ -46,3 +45,32 @@ loop:
exit:
ret void
}
+
+define void @check_dt_after_modifying_cfg(ptr %dst, i64 %x, i8 %y, i8 %z) {
+; CHECK-LABEL: define void @check_dt_after_modifying_cfg(
+; CHECK-SAME: ptr [[DST:%.*]], i64 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[OFFSET:%.*]] = lshr i64 [[X]], 2
+; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i8 [[Z]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[SEL_FROZEN]], 0
+; CHECK-NEXT: br i1 [[CMP]], label %[[SELECT_END:.*]], label %[[SELECT_FALSE_SINK:.*]]
+; CHECK: [[SELECT_FALSE_SINK]]:
+; CHECK-NEXT: [[SMIN:%.*]] = tail call i8 @llvm.smin.i8(i8 [[Y]], i8 0)
+; CHECK-NEXT: br label %[[SELECT_END]]
+; CHECK: [[SELECT_END]]:
+; CHECK-NEXT: [[SEL:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[SMIN]], %[[SELECT_FALSE_SINK]] ]
+; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET]]
+; CHECK-NEXT: store i8 [[SEL]], ptr [[SUNKADDR]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %offset = lshr i64 %x, 2
+ %gep.dst = getelementptr i8, ptr %dst, i64 %offset
+ %smin = tail call i8 @llvm.smin.i8(i8 %y, i8 0)
+ %cmp = icmp slt i8 %z, 0
+ %sel = select i1 %cmp, i8 0, i8 %smin
+ store i8 %sel, ptr %gep.dst, align 1
+ ret void
+}
+
+declare i8 @llvm.smin.i8(i8, i8) #0
More information about the llvm-commits
mailing list