[llvm-branch-commits] [llvm] release/22.x: [CGP][AArch64] Do not sink instructions that might read/write memory. (#176182) (PR #176773)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Jan 19 08:07:53 PST 2026
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/176773
Backport a4975a80899c3a310e856386bd6d4c7cf8c21f79
Requested by: @davemgreen
>From 2b3662ac4957ba391f7cc06030da7f3be1d50b2c Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Sun, 18 Jan 2026 06:18:25 +0000
Subject: [PATCH] [CGP][AArch64] Do not sink instructions that might read/write
memory. (#176182)
The test case's call instruction was being sank past the point where the
memory
it accessed was valid. Add a check that CGP does not try to sink
instruction that
might be invalid to move.
Fixes #176095
(cherry picked from commit a4975a80899c3a310e856386bd6d4c7cf8c21f79)
---
llvm/lib/CodeGen/CodeGenPrepare.cpp | 2 +-
.../AArch64/sink-free-instructions.ll | 98 +++++++++++++++++--
2 files changed, 93 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 65b5965d99c18..6161f78182eef 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -7943,7 +7943,7 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
for (Use *U : reverse(OpsToSink)) {
auto *UI = cast<Instruction>(U->get());
- if (isa<PHINode>(UI))
+ if (isa<PHINode>(UI) || UI->mayHaveSideEffects() || UI->mayReadFromMemory())
continue;
if (UI->getParent() == TargetBB) {
if (InstOrdering[UI] < InstOrdering[InsertPoint])
diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll
index 0ccfd9c20c12e..00d093f60d558 100644
--- a/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll
@@ -142,12 +142,12 @@ entry:
if.then:
%s2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %vmull0 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s1, <8 x i8> %s2) #3
+ %vmull0 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s1, <8 x i8> %s2)
ret <8 x i16> %vmull0
if.else:
%s4 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
- %vmull1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s3, <8 x i8> %s4) #3
+ %vmull1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s3, <8 x i8> %s4)
ret <8 x i16> %vmull1
}
@@ -174,12 +174,12 @@ entry:
if.then:
%s2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %vmull0 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %s1, <8 x i8> %s2) #3
+ %vmull0 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %s1, <8 x i8> %s2)
ret <8 x i16> %vmull0
if.else:
%s4 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
- %vmull1 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %s3, <8 x i8> %s4) #3
+ %vmull1 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %s3, <8 x i8> %s4)
ret <8 x i16> %vmull1
}
@@ -294,12 +294,12 @@ entry:
if.then:
%s2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %vmull0 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s1, <8 x i8> %s2) #3
+ %vmull0 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s1, <8 x i8> %s2)
ret <8 x i16> %vmull0
if.else:
%s4 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 10, i32 12, i32 13, i32 14, i32 15>
- %vmull1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s3, <8 x i8> %s4) #3
+ %vmull1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s3, <8 x i8> %s4)
ret <8 x i16> %vmull1
}
@@ -1003,3 +1003,89 @@ entry:
%2 = sub <vscale x 8 x i16> %0, %1
ret <vscale x 8 x i16> %2
}
+
+declare range(i64 0, 65536) i64 @backsmith_pure_3(ptr dead_on_return readonly captures(none) %0, <8 x i8> %BS_ARG_1, i32 %BS_ARG_2)
+define i32 @dont_sink_calls(ptr %func_1_a) {
+; CHECK-LABEL: @dont_sink_calls(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[BYVAL_TEMP:%.*]] = alloca <16 x i16>, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[BYVAL_TEMP]])
+; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr [[BYVAL_TEMP]], align 16
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @backsmith_pure_3(ptr dead_on_return nonnull [[BYVAL_TEMP]], <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 10, i8 0, i8 0>, i32 0)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[BYVAL_TEMP]])
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[FUNC_1_A:%.*]], align 8
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i64 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[CLEANUP:%.*]]
+; CHECK: if.end:
+; CHECK-NEXT: [[VQADDQ_V_I:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> <i8 3, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> zeroinitializer)
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[VQADDQ_V_I]], <16 x i8> poison, <16 x i32> <i32 0, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[VECINIT21:%.*]] = zext <16 x i8> [[TMP1]] to <16 x i64>
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i64> poison, i64 [[TMP2]], i64 0
+; CHECK-NEXT: [[VECINIT38:%.*]] = shufflevector <16 x i64> [[TMP3]], <16 x i64> poison, <16 x i32> <i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw <16 x i64> [[VECINIT38]], [[VECINIT21]]
+; CHECK-NEXT: store <16 x i64> [[MUL]], ptr [[FUNC_1_A]], align 128
+; CHECK-NEXT: br label [[CLEANUP]]
+; CHECK: cleanup:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %byval-temp = alloca <16 x i16>, align 16
+ call void @llvm.lifetime.start.p0(ptr nonnull %byval-temp)
+ store <16 x i16> zeroinitializer, ptr %byval-temp, align 16
+ %call4 = call i64 @backsmith_pure_3(ptr dead_on_return nonnull %byval-temp, <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 10, i8 0, i8 0>, i32 0)
+ call void @llvm.lifetime.end.p0(ptr nonnull %byval-temp)
+ %0 = load i64, ptr %func_1_a, align 8
+ %tobool.not = icmp eq i64 %0, 0
+ br i1 %tobool.not, label %if.end, label %cleanup
+
+if.end: ; preds = %entry
+ %vqaddq_v.i = tail call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> <i8 3, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> zeroinitializer)
+ %1 = shufflevector <16 x i8> %vqaddq_v.i, <16 x i8> poison, <16 x i32> <i32 0, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+ %vecinit21 = zext <16 x i8> %1 to <16 x i64>
+ %2 = insertelement <16 x i64> poison, i64 %call4, i64 0
+ %vecinit38 = shufflevector <16 x i64> %2, <16 x i64> poison, <16 x i32> <i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+ %mul = mul nuw nsw <16 x i64> %vecinit38, %vecinit21
+ store <16 x i64> %mul, ptr %func_1_a
+ br label %cleanup
+
+cleanup: ; preds = %entry, %if.end
+ ret i32 0
+}
+
+define i32 @dont_sink_loads(i1 %c, ptr %p1, ptr %p2) {
+; CHECK-LABEL: @dont_sink_loads(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[P1:%.*]], align 8, !range [[RNG0:![0-9]+]], !noundef [[META1:![0-9]+]]
+; CHECK-NEXT: store i64 0, ptr [[P2:%.*]], align 8
+; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_END:%.*]], label [[CLEANUP:%.*]]
+; CHECK: if.end:
+; CHECK-NEXT: [[VQADDQ_V_I:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> <i8 3, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> zeroinitializer)
+; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[VQADDQ_V_I]], <16 x i8> poison, <16 x i32> <i32 0, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[VECINIT21:%.*]] = zext <16 x i8> [[TMP0]] to <16 x i64>
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i64> poison, i64 [[TMP1]], i64 0
+; CHECK-NEXT: [[VECINIT38:%.*]] = shufflevector <16 x i64> [[TMP2]], <16 x i64> poison, <16 x i32> <i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw <16 x i64> [[VECINIT38]], [[VECINIT21]]
+; CHECK-NEXT: store <16 x i64> [[MUL]], ptr [[P1]], align 128
+; CHECK-NEXT: br label [[CLEANUP]]
+; CHECK: cleanup:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %call4 = load i64, ptr %p1, !range !0, !noundef !{}
+ store i64 0, ptr %p2
+ br i1 %c, label %if.end, label %cleanup
+
+if.end: ; preds = %entry
+ %vqaddq_v.i = tail call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> <i8 3, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> zeroinitializer)
+ %1 = shufflevector <16 x i8> %vqaddq_v.i, <16 x i8> poison, <16 x i32> <i32 0, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+ %vecinit21 = zext <16 x i8> %1 to <16 x i64>
+ %2 = insertelement <16 x i64> poison, i64 %call4, i64 0
+ %vecinit38 = shufflevector <16 x i64> %2, <16 x i64> poison, <16 x i32> <i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+ %mul = mul nuw nsw <16 x i64> %vecinit38, %vecinit21
+ store <16 x i64> %mul, ptr %p1
+ br label %cleanup
+
+cleanup: ; preds = %entry, %if.end
+ ret i32 0
+}
+!0 = !{i64 0, i64 128}
More information about the llvm-branch-commits
mailing list