[llvm] [AA] Support Running Target Specific AA before BasicAA (PR #125965)

via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 18 17:58:34 PST 2025


https://github.com/Chengjunp updated https://github.com/llvm/llvm-project/pull/125965

>From 7fd2a71ad2bf92821e586f5b35fd3ef3ffb0d15f Mon Sep 17 00:00:00 2001
From: chengjunp <chengjunp at nvidia.com>
Date: Wed, 19 Feb 2025 01:49:44 +0000
Subject: [PATCH] Move all target-specific AA before BasicAA and fix issues in
 FlattenCFG and AMDGPUAA

---
 llvm/lib/Analysis/AliasAnalysis.cpp           | 14 +--
 llvm/lib/Passes/PassBuilderPipelines.cpp      | 10 ++-
 .../lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp |  8 +-
 llvm/lib/Transforms/Utils/FlattenCFG.cpp      |  8 +-
 .../NVPTXAA/NVPTXAA_before_BasicAA.ll         | 11 +++
 llvm/test/Transforms/Util/flatten-cfg.ll      | 88 ++++++++++++++++++-
 6 files changed, 124 insertions(+), 15 deletions(-)
 create mode 100644 llvm/test/Analysis/NVPTXAA/NVPTXAA_before_BasicAA.ll

diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp
index 20cdbb6320322..bbd440b9999b1 100644
--- a/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -752,6 +752,14 @@ bool AAResultsWrapperPass::runOnFunction(Function &F) {
   AAR.reset(
       new AAResults(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F)));
 
+  // If available, run an external AA providing callback first. Running a
+  // target-specific AA early can improve compile time by leveraging
+  // target-specific knowledge to quickly determine some alias results, thereby
+  // reducing the workload for BasicAA.
+  if (auto *WrapperPass = getAnalysisIfAvailable<ExternalAAWrapperPass>())
+    if (WrapperPass->CB)
+      WrapperPass->CB(*this, F, *AAR);
+
   // BasicAA is always available for function analyses. Also, we add it first
   // so that it can trump TBAA results when it proves MustAlias.
   // FIXME: TBAA should have an explicit mode to support this and then we
@@ -769,12 +777,6 @@ bool AAResultsWrapperPass::runOnFunction(Function &F) {
   if (auto *WrapperPass = getAnalysisIfAvailable<SCEVAAWrapperPass>())
     AAR->addAAResult(WrapperPass->getResult());
 
-  // If available, run an external AA providing callback over the results as
-  // well.
-  if (auto *WrapperPass = getAnalysisIfAvailable<ExternalAAWrapperPass>())
-    if (WrapperPass->CB)
-      WrapperPass->CB(*this, F, *AAR);
-
   // Analyses don't mutate the IR, so return false.
   return false;
 }
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 17710eb94b6de..1d7e296f7d9af 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -2202,6 +2202,12 @@ AAManager PassBuilder::buildDefaultAAPipeline() {
   // The order in which these are registered determines their priority when
   // being queried.
 
+  // Add target-specific alias analyses. Running a target-specific AA early can
+  // improve compile time by leveraging target-specific knowledge to quickly
+  // determine some alias results, thereby reducing the workload for BasicAA.
+  if (TM)
+    TM->registerDefaultAliasAnalyses(AA);
+
   // First we register the basic alias analysis that provides the majority of
   // per-function local AA logic. This is a stateless, on-demand local set of
   // AA techniques.
@@ -2219,9 +2225,5 @@ AAManager PassBuilder::buildDefaultAAPipeline() {
   if (EnableGlobalAnalyses)
     AA.registerModuleAnalysis<GlobalsAA>();
 
-  // Add target-specific alias analyses.
-  if (TM)
-    TM->registerDefaultAliasAnalyses(AA);
-
   return AA;
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
index 8d3eac6868318..8102b8bbd9d7b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@@ -49,8 +49,12 @@ void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
 AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
                                   const MemoryLocation &LocB, AAQueryInfo &AAQI,
                                   const Instruction *) {
-  unsigned asA = LocA.Ptr->getType()->getPointerAddressSpace();
-  unsigned asB = LocB.Ptr->getType()->getPointerAddressSpace();
+  Type* TypeA = LocA.Ptr->getType();
+  Type* TypeB = LocB.Ptr->getType();
+  if (!TypeA->isPointerTy() || !TypeB->isPointerTy())
+    return AliasResult::MayAlias;
+  unsigned asA = TypeA->getPointerAddressSpace();
+  unsigned asB = TypeB->getPointerAddressSpace();
 
   if (!AMDGPU::addrspacesMayAlias(asA, asB))
     return AliasResult::NoAlias;
diff --git a/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/llvm/lib/Transforms/Utils/FlattenCFG.cpp
index 16b4bb1981d8b..151ca06f3498a 100644
--- a/llvm/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/llvm/lib/Transforms/Utils/FlattenCFG.cpp
@@ -357,8 +357,12 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Block1, BasicBlock *Block2,
       for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) {
         if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) {
           // Check alias with Head2.
-          if (!AA || !AA->isNoAlias(&*iter1, &*BI))
-            return false;
+          if (AA) {
+            MemoryLocation Loc1 = MemoryLocation::get(&*iter1);
+            MemoryLocation Loc2 = MemoryLocation::get(&*BI);
+            if (!AA->isNoAlias(Loc1, Loc2))
+              return false;
+          }
         }
       }
     }
diff --git a/llvm/test/Analysis/NVPTXAA/NVPTXAA_before_BasicAA.ll b/llvm/test/Analysis/NVPTXAA/NVPTXAA_before_BasicAA.ll
new file mode 100644
index 0000000000000..95e2d6ac148f3
--- /dev/null
+++ b/llvm/test/Analysis/NVPTXAA/NVPTXAA_before_BasicAA.ll
@@ -0,0 +1,11 @@
+; RUN: opt -aa-pipeline=default -passes='require<aa>' -debug-pass-manager -disable-output -S < %s 2>&1 | FileCheck %s
+
+; Target-specific AA should run before BasicAA to reduce compile time
+target triple = "nvptx64-nvidia-cuda"
+
+; CHECK: Running analysis: NVPTXAA on foo
+; CHECK-NEXT: Running analysis: BasicAA on foo
+define void @foo(){
+entry:
+  ret void
+}
diff --git a/llvm/test/Transforms/Util/flatten-cfg.ll b/llvm/test/Transforms/Util/flatten-cfg.ll
index 038dcaa47419a..2a006000b0035 100644
--- a/llvm/test/Transforms/Util/flatten-cfg.ll
+++ b/llvm/test/Transforms/Util/flatten-cfg.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
-; RUN: opt -passes=flatten-cfg -S < %s | FileCheck %s
+; RUN: opt -passes='require<aa>,flatten-cfg' -S < %s | FileCheck %s
 
 
 ; This test checks whether the pass completes without a crash.
@@ -309,3 +309,89 @@ if.then.y:
 exit:
   ret i1 %cmp.y
 }
+
+; Test that two if-regions are not merged when there's potential aliasing
+; between a store in the first if-region and a load in the second if-region's header
+define i32 @test_alias(i32 %a, i32 %b, ptr %p1, ptr %p2) {
+; CHECK-LABEL: define i32 @test_alias
+; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 42, ptr [[P1]], align 4
+; CHECK-NEXT:    [[COND1:%.*]] = icmp eq i32 [[A]], 0
+; CHECK-NEXT:    br i1 [[COND1]], label [[IF_THEN1:%.*]], label [[IF_END1:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    store i32 100, ptr [[P2]], align 4
+; CHECK-NEXT:    br label [[IF_END1]]
+; CHECK:       if.end1:
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[P1]], align 4
+; CHECK-NEXT:    [[COND2:%.*]] = icmp eq i32 [[B]], 0
+; CHECK-NEXT:    br i1 [[COND2]], label [[IF_THEN2:%.*]], label [[IF_END2:%.*]]
+; CHECK:       if.then2:
+; CHECK-NEXT:    store i32 100, ptr [[P2]], align 4
+; CHECK-NEXT:    br label [[IF_END2]]
+; CHECK:       if.end2:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  store i32 42, ptr %p1
+  %cond1 = icmp eq i32 %a, 0
+  br i1 %cond1, label %if.then1, label %if.end1
+
+if.then1:
+  store i32 100, ptr %p2  ; May alias with the load below
+  br label %if.end1
+
+if.end1:
+  %val = load i32, ptr %p1  ; This load prevents merging due to potential alias
+  %cond2 = icmp eq i32 %b, 0
+  br i1 %cond2, label %if.then2, label %if.end2
+
+if.then2:
+  store i32 100, ptr %p2
+  br label %if.end2
+
+if.end2:
+  ret i32 0
+}
+
+; Test that two if-regions are merged when there's no potential aliasing
+; between a store in the first if-region and a load in the second if-region's header
+define i32 @test_no_alias(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @test_no_alias
+; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 42, ptr [[P]], align 4
+; CHECK-NEXT:    [[COND1:%.*]] = icmp eq i32 [[A]], 0
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr @g, align 4
+; CHECK-NEXT:    [[COND2:%.*]] = icmp eq i32 [[B]], 0
+; CHECK-NEXT:    [[TMP0:%.*]] = or i1 [[COND1]], [[COND2]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[IF_THEN2:%.*]], label [[IF_END2:%.*]]
+; CHECK:       if.then2:
+; CHECK-NEXT:    store i32 100, ptr [[P]], align 4
+; CHECK-NEXT:    br label [[IF_END2]]
+; CHECK:       if.end2:
+; CHECK-NEXT:    ret i32 0
+;
+  entry:
+  %p = alloca i32
+  store i32 42, ptr %p
+  %cond1 = icmp eq i32 %a, 0
+  br i1 %cond1, label %if.then1, label %if.end1
+
+if.then1:
+  store i32 100, ptr %p  ; No alias with the load below
+  br label %if.end1
+
+if.end1:
+  %val = load i32, ptr @g
+  %cond2 = icmp eq i32 %b, 0
+  br i1 %cond2, label %if.then2, label %if.end2
+
+if.then2:
+  store i32 100, ptr %p
+  br label %if.end2
+
+if.end2:
+  ret i32 0
+}



More information about the llvm-commits mailing list