[llvm] AlignmentFromAssumptions should not track the load result users (PR #73370)

Fri Nov 24 12:41:18 PST 2023

https://github.com/alex-t created https://github.com/llvm/llvm-project/pull/73370

AlignmentFromAssumptions uses SCEV to update the load/store alignment. It tracks down the use-def chains for the pointer which it takes from the assumption cache until it reaches the load or store instruction. It mistakenly adds to the worklist the users of the load result irrespective of the fact that the load result has no connection with the original pointer, moreover, it is not a pointer at all in most cases.
Thus the def-use chain contains irrelevant load users. When it is a store instruction the algorithm attempts to adjust its alignment to the alignment of the original pointer. The problem appears when the load and store memory operand pointers belong to different address spaces and possibly have different sizes.
The 4bf015c035e4e5b63c7222dfb15ff274a5ed905c was an attempt to address a similar problem. The truncation or zero extension was added to make pointers the same size. That looks strange to me because the zero extension of the pointer is not legal. The test in the 4bf015c035e4e5b63c7222dfb15ff274a5ed905c does not work any longer as for the explicit address spaces conversion the addrspacecast is generated.
Summarize:
1. For the alloca to global address spaces conversion addrspacecasts are used, so the code added by the 4bf015c035e4e5b63c7222dfb15ff274a5ed905c is no longer functional.
2. The AlignmentFromAssumptions algorithm should not add the load users to the worklist as they have nothing to do with the original pointer.

>From 09bceafc5dbff302157c1affe945cf9c3325fee2 Mon Sep 17 00:00:00 2001
From: Alexander Timofeev <alexander.timofeev at amd.com>
Date: Fri, 24 Nov 2023 21:11:35 +0100
Subject: [PATCH] AlignmentFromAssumptions should not track the load result
 users

---
 .../Scalar/AlignmentFromAssumptions.cpp       | 20 +++++++++----------
 .../alignment-from-assumptions_dont_crash.ll  | 16 +++++++++++++++
 2 files changed, 26 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll

diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 63b7903ef955d9b..905ff2e80cd111a 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -83,11 +83,7 @@ static Align getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV,
                              const SCEV *OffSCEV, Value *Ptr,
                              ScalarEvolution *SE) {
   const SCEV *PtrSCEV = SE->getSCEV(Ptr);
-  // On a platform with 32-bit allocas, but 64-bit flat/global pointer sizes
-  // (*cough* AMDGPU), the effective SCEV type of AASCEV and PtrSCEV
-  // may disagree. Trunc/extend so they agree.
-  PtrSCEV = SE->getTruncateOrZeroExtend(
-      PtrSCEV, SE->getEffectiveSCEVType(AASCEV->getType()));
+
   const SCEV *DiffSCEV = SE->getMinusSCEV(PtrSCEV, AASCEV);
   if (isa<SCEVCouldNotCompute>(DiffSCEV))
     return Align(1);
@@ -216,6 +212,7 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
   }
 
   while (!WorkList.empty()) {
+    bool AddUsers = true;
     Instruction *J = WorkList.pop_back_val();
     if (LoadInst *LI = dyn_cast<LoadInst>(J)) {
       if (!isValidAssumeForContext(ACall, J, DT))
@@ -226,6 +223,8 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
         LI->setAlignment(NewAlignment);
         ++NumLoadAlignChanged;
       }
+      // The user of a Load uses data - not a pointer!
+      AddUsers = false;
     } else if (StoreInst *SI = dyn_cast<StoreInst>(J)) {
       if (!isValidAssumeForContext(ACall, J, DT))
         continue;
@@ -267,11 +266,12 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
     // Now that we've updated that use of the pointer, look for other uses of
     // the pointer to update.
     Visited.insert(J);
-    for (User *UJ : J->users()) {
-      Instruction *K = cast<Instruction>(UJ);
-      if (!Visited.count(K))
-        WorkList.push_back(K);
-    }
+    if (AddUsers)
+      for (User *UJ : J->users()) {
+        Instruction *K = cast<Instruction>(UJ);
+        if (!Visited.count(K))
+          WorkList.push_back(K);
+      }
   }
 
   return true;
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
new file mode 100644
index 000000000000000..107d677cdbc271c
--- /dev/null
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
@@ -0,0 +1,16 @@
+; Test that we don't crash.
+; RUN: opt < %s -passes=alignment-from-assumptions -S
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+target triple = "amdgcn-amd-amdhsa"
+
+define amdgpu_kernel void @vectorize_global_local(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
+bb:
+  %tmp2 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %tmp2, i64 4) ]
+  %tmp3 = load i32, ptr addrspace(1) %tmp2, align 4
+  %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+  store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
+  ret void
+}
+declare void @llvm.assume(i1 noundef)