[llvm] a323dfc - Don't sink ptrtoint/inttoptr sequences into non-noop addrspacecasts.

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Sat Jul 16 07:56:47 PDT 2022


Author: Tim Besard
Date: 2022-07-16T10:56:42-04:00
New Revision: a323dfc0152a11c6934695b1ae67920dae9fad9b

URL: https://github.com/llvm/llvm-project/commit/a323dfc0152a11c6934695b1ae67920dae9fad9b
DIFF: https://github.com/llvm/llvm-project/commit/a323dfc0152a11c6934695b1ae67920dae9fad9b.diff

LOG: Don't sink ptrtoint/inttoptr sequences into non-noop addrspacecasts.

In https://reviews.llvm.org/D30114, support for mismatching address
spaces was introduced to CodeGenPrepare's optimizeMemoryInst, using
addrspacecast as it was argued that only no-op addrspacecasts would be
considered when constructing the address mode. However, by doing
inttoptr/ptrtoint, it's possible to get CGP to emit an addrspace
that's not actually no-op, introducing a miscompilation:

define void @kernel(i8* %julia_ptr) {
  %intptr = ptrtoint i8* %julia_ptr to i64
  %ptr = inttoptr i64 %intptr to i32 addrspace(3)*

  br label %end
end:

  store atomic i32 1, i32 addrspace(3)* %ptr unordered, align 4
  ret void
}

Gets compiled to:

define void @kernel(i8* %julia_ptr) {
end:
  %0 = addrspacecast i8* %julia_ptr to i32 addrspace(3)*
  store atomic i32 1, i32 addrspace(3)* %0 unordered, align 4
  ret void
}

In the case of NVPTX, this introduces a cvta.to.shared, whereas
leaving out the %end block and branch doesn't trigger this
optimization. This results in illegal memory accesses as seen in
https://github.com/JuliaGPU/CUDA.jl/issues/558

In this change, I introduced a check before doing the pointer cast
that verifies address spaces are the same. If not, it emits a
ptrtoint/inttoptr combination to get a no-op cast between address
spaces. I decided against disallowing ptrtoint/inttoptr with
non-default AS in matchOperationAddr, because now its still possible
to look through multiple sequences of them that ultimately do not
result in a address space mismatch (i.e. the second lit test).

Added: 
    llvm/test/Transforms/CodeGenPrepare/NVPTX/dont-introduce-addrspacecast.ll

Modified: 
    llvm/lib/CodeGen/CodeGenPrepare.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 6778af22f532..d80123c44074 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -5227,18 +5227,31 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
   WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
 
   Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
+  Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
   if (SunkAddr) {
     LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
                       << " for " << *MemoryInst << "\n");
-    if (SunkAddr->getType() != Addr->getType())
-      SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
+    if (SunkAddr->getType() != Addr->getType()) {
+      if (SunkAddr->getType()->getPointerAddressSpace() !=
+              Addr->getType()->getPointerAddressSpace() &&
+          !DL->isNonIntegralPointerType(Addr->getType())) {
+        // There are two reasons the address spaces might not match: a no-op
+        // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
+        // ptrtoint/inttoptr pair to ensure we match the original semantics.
+        // TODO: allow bitcast between 
diff erent address space pointers with the
+        // same size.
+        SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
+        SunkAddr =
+            Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
+      } else
+        SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
+    }
   } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
                                    SubtargetInfo->addrSinkUsingGEPs())) {
     // By default, we use the GEP-based method when AA is used later. This
     // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
     LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
                       << " for " << *MemoryInst << "\n");
-    Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
     Value *ResultPtr = nullptr, *ResultIndex = nullptr;
 
     // First, find the pointer.
@@ -5361,8 +5374,21 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
                                      AddrMode.InBounds);
       }
 
-      if (SunkAddr->getType() != Addr->getType())
-        SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
+      if (SunkAddr->getType() != Addr->getType()) {
+        if (SunkAddr->getType()->getPointerAddressSpace() !=
+                Addr->getType()->getPointerAddressSpace() &&
+            !DL->isNonIntegralPointerType(Addr->getType())) {
+          // There are two reasons the address spaces might not match: a no-op
+          // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
+          // ptrtoint/inttoptr pair to ensure we match the original semantics.
+          // TODO: allow bitcast between 
diff erent address space pointers with
+          // the same size.
+          SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
+          SunkAddr =
+              Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
+        } else
+          SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
+      }
     }
   } else {
     // We'd require a ptrtoint/inttoptr down the line, which we can't do for

diff  --git a/llvm/test/Transforms/CodeGenPrepare/NVPTX/dont-introduce-addrspacecast.ll b/llvm/test/Transforms/CodeGenPrepare/NVPTX/dont-introduce-addrspacecast.ll
new file mode 100644
index 000000000000..39e50241c9cc
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/NVPTX/dont-introduce-addrspacecast.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+
+; ptrtoint/inttoptr combinations can introduce semantically-meaningful address space casts
+; which we can't sink into an addrspacecast
+
+; CHECK-LABEL: @test
+define void @test(i8* %input_ptr) {
+  ; CHECK-LABEL: l1:
+  ; CHECK-NOT: addrspacecast
+  %intptr = ptrtoint i8* %input_ptr to i64
+  %ptr = inttoptr i64 %intptr to i32 addrspace(3)*
+
+  br label %l1
+l1:
+
+  store atomic i32 1, i32 addrspace(3)* %ptr unordered, align 4
+  ret void
+}
+
+
+; we still should be able to look through multiple sequences of inttoptr/ptrtoint
+
+; CHECK-LABEL: @test2
+define void @test2(i8* %input_ptr) {
+  ; CHECK-LABEL: l2:
+  ; CHECK: bitcast
+  ; CHECK-NEXT: store
+  %intptr = ptrtoint i8* %input_ptr to i64
+  %ptr = inttoptr i64 %intptr to i32 addrspace(3)*
+
+  %intptr2 = ptrtoint i32 addrspace(3)* %ptr to i64
+  %ptr2 = inttoptr i64 %intptr2 to i32*
+
+  br label %l2
+l2:
+
+  store atomic i32 1, i32* %ptr2 unordered, align 4
+  ret void
+}


        


More information about the llvm-commits mailing list