[llvm] a323dfc - Don't sink ptrtoint/inttoptr sequences into non-noop addrspacecasts.
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 16 07:56:47 PDT 2022
Author: Tim Besard
Date: 2022-07-16T10:56:42-04:00
New Revision: a323dfc0152a11c6934695b1ae67920dae9fad9b
URL: https://github.com/llvm/llvm-project/commit/a323dfc0152a11c6934695b1ae67920dae9fad9b
DIFF: https://github.com/llvm/llvm-project/commit/a323dfc0152a11c6934695b1ae67920dae9fad9b.diff
LOG: Don't sink ptrtoint/inttoptr sequences into non-noop addrspacecasts.
In https://reviews.llvm.org/D30114, support for mismatching address
spaces was introduced to CodeGenPrepare's optimizeMemoryInst, using
addrspacecast as it was argued that only no-op addrspacecasts would be
considered when constructing the address mode. However, by doing
inttoptr/ptrtoint, it's possible to get CGP to emit an addrspace
that's not actually no-op, introducing a miscompilation:
define void @kernel(i8* %julia_ptr) {
%intptr = ptrtoint i8* %julia_ptr to i64
%ptr = inttoptr i64 %intptr to i32 addrspace(3)*
br label %end
end:
store atomic i32 1, i32 addrspace(3)* %ptr unordered, align 4
ret void
}
Gets compiled to:
define void @kernel(i8* %julia_ptr) {
end:
%0 = addrspacecast i8* %julia_ptr to i32 addrspace(3)*
store atomic i32 1, i32 addrspace(3)* %0 unordered, align 4
ret void
}
In the case of NVPTX, this introduces a cvta.to.shared, whereas
leaving out the %end block and branch doesn't trigger this
optimization. This results in illegal memory accesses as seen in
https://github.com/JuliaGPU/CUDA.jl/issues/558
In this change, I introduced a check before doing the pointer cast
that verifies address spaces are the same. If not, it emits a
ptrtoint/inttoptr combination to get a no-op cast between address
spaces. I decided against disallowing ptrtoint/inttoptr with
non-default AS in matchOperationAddr, because now its still possible
to look through multiple sequences of them that ultimately do not
result in a address space mismatch (i.e. the second lit test).
Added:
llvm/test/Transforms/CodeGenPrepare/NVPTX/dont-introduce-addrspacecast.ll
Modified:
llvm/lib/CodeGen/CodeGenPrepare.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 6778af22f532..d80123c44074 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -5227,18 +5227,31 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
+ Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
if (SunkAddr) {
LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
<< " for " << *MemoryInst << "\n");
- if (SunkAddr->getType() != Addr->getType())
- SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
+ if (SunkAddr->getType() != Addr->getType()) {
+ if (SunkAddr->getType()->getPointerAddressSpace() !=
+ Addr->getType()->getPointerAddressSpace() &&
+ !DL->isNonIntegralPointerType(Addr->getType())) {
+ // There are two reasons the address spaces might not match: a no-op
+ // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
+ // ptrtoint/inttoptr pair to ensure we match the original semantics.
+ // TODO: allow bitcast between
diff erent address space pointers with the
+ // same size.
+ SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
+ SunkAddr =
+ Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
+ } else
+ SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
+ }
} else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
SubtargetInfo->addrSinkUsingGEPs())) {
// By default, we use the GEP-based method when AA is used later. This
// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
<< " for " << *MemoryInst << "\n");
- Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
Value *ResultPtr = nullptr, *ResultIndex = nullptr;
// First, find the pointer.
@@ -5361,8 +5374,21 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
AddrMode.InBounds);
}
- if (SunkAddr->getType() != Addr->getType())
- SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
+ if (SunkAddr->getType() != Addr->getType()) {
+ if (SunkAddr->getType()->getPointerAddressSpace() !=
+ Addr->getType()->getPointerAddressSpace() &&
+ !DL->isNonIntegralPointerType(Addr->getType())) {
+ // There are two reasons the address spaces might not match: a no-op
+ // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
+ // ptrtoint/inttoptr pair to ensure we match the original semantics.
+ // TODO: allow bitcast between
diff erent address space pointers with
+ // the same size.
+ SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
+ SunkAddr =
+ Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
+ } else
+ SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
+ }
}
} else {
// We'd require a ptrtoint/inttoptr down the line, which we can't do for
diff --git a/llvm/test/Transforms/CodeGenPrepare/NVPTX/dont-introduce-addrspacecast.ll b/llvm/test/Transforms/CodeGenPrepare/NVPTX/dont-introduce-addrspacecast.ll
new file mode 100644
index 000000000000..39e50241c9cc
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/NVPTX/dont-introduce-addrspacecast.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+
+; ptrtoint/inttoptr combinations can introduce semantically-meaningful address space casts
+; which we can't sink into an addrspacecast
+
+; CHECK-LABEL: @test
+define void @test(i8* %input_ptr) {
+ ; CHECK-LABEL: l1:
+ ; CHECK-NOT: addrspacecast
+ %intptr = ptrtoint i8* %input_ptr to i64
+ %ptr = inttoptr i64 %intptr to i32 addrspace(3)*
+
+ br label %l1
+l1:
+
+ store atomic i32 1, i32 addrspace(3)* %ptr unordered, align 4
+ ret void
+}
+
+
+; we still should be able to look through multiple sequences of inttoptr/ptrtoint
+
+; CHECK-LABEL: @test2
+define void @test2(i8* %input_ptr) {
+ ; CHECK-LABEL: l2:
+ ; CHECK: bitcast
+ ; CHECK-NEXT: store
+ %intptr = ptrtoint i8* %input_ptr to i64
+ %ptr = inttoptr i64 %intptr to i32 addrspace(3)*
+
+ %intptr2 = ptrtoint i32 addrspace(3)* %ptr to i64
+ %ptr2 = inttoptr i64 %intptr2 to i32*
+
+ br label %l2
+l2:
+
+ store atomic i32 1, i32* %ptr2 unordered, align 4
+ ret void
+}
More information about the llvm-commits
mailing list