[llvm] r298172 - [AMDGPU] Add address space based alias analysis pass
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 17 16:56:59 PDT 2017
Author: rampitec
Date: Fri Mar 17 18:56:58 2017
New Revision: 298172
URL: http://llvm.org/viewvc/llvm-project?rev=298172&view=rev
Log:
[AMDGPU] Add address space based alias analysis pass
This is direct port of HSAILAliasAnalysis pass, just cleaned for
style and renamed.
Differential Revision: https://reviews.llvm.org/D31103
Added:
llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
llvm/trunk/test/CodeGen/AMDGPU/vectorize-global-local.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt
llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
llvm/trunk/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.h?rev=298172&r1=298171&r2=298172&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.h Fri Mar 17 18:56:58 2017
@@ -119,6 +119,9 @@ extern char &SIDebuggerInsertNopsID;
void initializeSIInsertWaitsPass(PassRegistry&);
extern char &SIInsertWaitsID;
+ImmutablePass *createAMDGPUAAWrapperPass();
+void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
+
Target &getTheAMDGPUTarget();
Target &getTheGCNTarget();
Added: llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp?rev=298172&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp (added)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp Fri Mar 17 18:56:58 2017
@@ -0,0 +1,117 @@
+//===- AMDGPUAliasAnalysis ---------------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the AMGPU address space based alias analysis pass.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUAliasAnalysis.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-aa"
+
+// Register this pass...
+char AMDGPUAAWrapperPass::ID = 0;
+INITIALIZE_PASS(AMDGPUAAWrapperPass, "amdgpu-aa",
+ "AMDGPU Address space based Alias Analysis", false, true)
+
+ImmutablePass *llvm::createAMDGPUAAWrapperPass() {
+ return new AMDGPUAAWrapperPass();
+}
+
+void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+}
+
+AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) {
+ // This array is indexed by the AMDGPUAS::AddressSpaces
+ // enum elements PRIVATE_ADDRESS ... to FLAT_ADDRESS
+ // see "llvm/Transforms/AMDSPIRUtils.h"
+ static const AliasResult ASAliasRules[5][5] = {
+ /* Private Global Constant Group Flat */
+ /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias},
+ /* Global */ {NoAlias , MayAlias, NoAlias , NoAlias , MayAlias},
+ /* Constant */ {NoAlias , NoAlias , MayAlias, NoAlias , MayAlias},
+ /* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias},
+ /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias}
+ };
+ unsigned asA = LocA.Ptr->getType()->getPointerAddressSpace();
+ unsigned asB = LocB.Ptr->getType()->getPointerAddressSpace();
+ if (asA > AMDGPUAS::AddressSpaces::FLAT_ADDRESS ||
+ asB > AMDGPUAS::AddressSpaces::FLAT_ADDRESS)
+ report_fatal_error("Pointer address space out of range");
+
+ AliasResult Result = ASAliasRules[asA][asB];
+ if (Result == NoAlias) return Result;
+
+ if (isa<Argument>(LocA.Ptr) && isa<Argument>(LocB.Ptr)) {
+ Type *T1 = cast<PointerType>(LocA.Ptr->getType())->getElementType();
+ Type *T2 = cast<PointerType>(LocB.Ptr->getType())->getElementType();
+
+ if ((T1->isVectorTy() && !T2->isVectorTy()) ||
+ (T2->isVectorTy() && !T1->isVectorTy()))
+ return NoAlias;
+ }
+ // Forward the query to the next alias analysis.
+ return AAResultBase::alias(LocA, LocB);
+}
+
+bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
+ bool OrLocal) {
+ const Value *Base = GetUnderlyingObject(Loc.Ptr, DL);
+
+ if (Base->getType()->getPointerAddressSpace() ==
+ AMDGPUAS::AddressSpaces::CONSTANT_ADDRESS) {
+ return true;
+ }
+
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
+ if (GV->isConstant())
+ return true;
+ } else if (const Argument *Arg = dyn_cast<Argument>(Base)) {
+ const Function *F = Arg->getParent();
+
+ // Only assume constant memory for arguments on kernels.
+ switch (F->getCallingConv()) {
+ default:
+ return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ break;
+ }
+
+ unsigned ArgNo = Arg->getArgNo();
+ /* On an argument, ReadOnly attribute indicates that the function does
+ not write through this pointer argument, even though it may write
+ to the memory that the pointer points to.
+ On an argument, ReadNone attribute indicates that the function does
+ not dereference that pointer argument, even though it may read or write
+ the memory that the pointer points to if accessed through other pointers.
+ */
+ if (F->getAttributes().hasAttribute(ArgNo + 1, Attribute::NoAlias) &&
+ (F->getAttributes().hasAttribute(ArgNo + 1, Attribute::ReadNone) ||
+ F->getAttributes().hasAttribute(ArgNo + 1, Attribute::ReadOnly))) {
+ return true;
+ }
+ }
+ return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+}
Added: llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h?rev=298172&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h (added)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h Fri Mar 17 18:56:58 2017
@@ -0,0 +1,86 @@
+//===- AMDGPUAliasAnalysis ---------------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the AMGPU address space based alias analysis pass.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
+#define LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+/// A simple AA result that uses TBAA metadata to answer queries.
+class AMDGPUAAResult : public AAResultBase<AMDGPUAAResult> {
+ friend AAResultBase<AMDGPUAAResult>;
+
+ const DataLayout &DL;
+
+public:
+ explicit AMDGPUAAResult(const DataLayout &DL) : AAResultBase(), DL(DL) {}
+ AMDGPUAAResult(AMDGPUAAResult &&Arg)
+ : AAResultBase(std::move(Arg)), DL(Arg.DL){}
+
+ /// Handle invalidation events from the new pass manager.
+ ///
+ /// By definition, this result is stateless and so remains valid.
+ bool invalidate(Function &, const PreservedAnalyses &) { return false; }
+
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+ bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
+
+private:
+ bool Aliases(const MDNode *A, const MDNode *B) const;
+ bool PathAliases(const MDNode *A, const MDNode *B) const;
+};
+
+/// Analysis pass providing a never-invalidated alias analysis result.
+class AMDGPUAA : public AnalysisInfoMixin<AMDGPUAA> {
+ friend AnalysisInfoMixin<AMDGPUAA>;
+ static char PassID;
+
+public:
+ typedef AMDGPUAAResult Result;
+
+ AMDGPUAAResult run(Function &F, AnalysisManager<Function> &AM) {
+ return AMDGPUAAResult(F.getParent()->getDataLayout());
+ }
+};
+
+/// Legacy wrapper pass to provide the AMDGPUAAResult object.
+class AMDGPUAAWrapperPass : public ImmutablePass {
+ std::unique_ptr<AMDGPUAAResult> Result;
+
+public:
+ static char ID;
+
+ AMDGPUAAWrapperPass() : ImmutablePass(ID) {
+ initializeAMDGPUAAWrapperPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ AMDGPUAAResult &getResult() { return *Result; }
+ const AMDGPUAAResult &getResult() const { return *Result; }
+
+ bool doInitialization(Module &M) override {
+ Result.reset(new AMDGPUAAResult(M.getDataLayout()));
+ return false;
+ }
+ bool doFinalization(Module &M) override {
+ Result.reset();
+ return false;
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+}
+#endif // LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=298172&r1=298171&r2=298172&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Fri Mar 17 18:56:58 2017
@@ -15,6 +15,7 @@
#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"
+#include "AMDGPUAliasAnalysis.h"
#include "AMDGPUCallLowering.h"
#include "AMDGPUInstructionSelector.h"
#include "AMDGPULegalizerInfo.h"
@@ -93,6 +94,11 @@ static cl::opt<bool> InternalizeSymbols(
cl::init(false),
cl::Hidden);
+// Enable address space based alias analysis
+static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
+ cl::desc("Enable AMDGPU Alias Analysis"),
+ cl::init(true));
+
extern "C" void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
@@ -119,6 +125,7 @@ extern "C" void LLVMInitializeAMDGPUTarg
initializeSIInsertSkipsPass(*PR);
initializeSIDebuggerInsertNopsPass(*PR);
initializeSIOptimizeExecMaskingPass(*PR);
+ initializeAMDGPUAAWrapperPassPass(*PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -507,6 +514,15 @@ void AMDGPUPassConfig::addIRPasses() {
addPass(createSROAPass());
addStraightLineScalarOptimizationPasses();
+
+ if (EnableAMDGPUAliasAnalysis) {
+ addPass(createAMDGPUAAWrapperPass());
+ addPass(createExternalAAWrapperPass([](Pass &P, Function &,
+ AAResults &AAR) {
+ if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
+ AAR.addAAResult(WrapperPass->getResult());
+ }));
+ }
}
TargetPassConfig::addIRPasses();
Modified: llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt?rev=298172&r1=298171&r2=298172&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt Fri Mar 17 18:56:58 2017
@@ -36,6 +36,7 @@ endif()
add_llvm_target(AMDGPUCodeGen
AMDILCFGStructurizer.cpp
+ AMDGPUAliasAnalysis.cpp
AMDGPUAlwaysInlinePass.cpp
AMDGPUAnnotateKernelFeatures.cpp
AMDGPUAnnotateUniformValues.cpp
Modified: llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll?rev=298172&r1=298171&r2=298172&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll Fri Mar 17 18:56:58 2017
@@ -1,9 +1,9 @@
-; RUN: llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
-; RUN: llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-unaligned-buffer-access < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE
-; RUN: llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
-; RUN: llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-unaligned-buffer-access < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA
-; RUN: llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
-; RUN: llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-unaligned-buffer-access < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE
+; RUN: llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-unaligned-buffer-access < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA
+; RUN: llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck -check-prefix=HSAOPT -check-prefix=OPT %s
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck -check-prefix=NOHSAOPT -check-prefix=OPT %s
Modified: llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll?rev=298172&r1=298171&r2=298172&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll Fri Mar 17 18:56:58 2017
@@ -1,6 +1,6 @@
-; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=gfx901 -mattr=+flat-for-global,-fp64-fp16-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
-; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=fiji -mattr=+flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=CIVI -check-prefix=VI -check-prefix=GFX89 %s
-; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=CIVI -check-prefix=CI %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=gfx901 -enable-amdgpu-aa=0 -mattr=+flat-for-global,-fp64-fp16-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=fiji -enable-amdgpu-aa=0 -mattr=+flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=CIVI -check-prefix=VI -check-prefix=GFX89 %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=hawaii -enable-amdgpu-aa=0 -mattr=+flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=CIVI -check-prefix=CI %s
; GCN-LABEL: {{^}}s_insertelement_v2i16_0:
; GCN: s_load_dword [[VEC:s[0-9]+]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll?rev=298172&r1=298171&r2=298172&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll Fri Mar 17 18:56:58 2017
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -check-prefix=FUNC -check-prefix=CI %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -check-prefix=FUNC -check-prefix=CI %s
declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
Modified: llvm/trunk/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll?rev=298172&r1=298171&r2=298172&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll Fri Mar 17 18:56:58 2017
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs -mattr=-promote-alloca,-load-store-opt < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=hawaii -enable-amdgpu-aa=0 -verify-machineinstrs -mattr=-promote-alloca,-load-store-opt < %s | FileCheck -check-prefix=GCN %s
@sPrivateStorage = internal addrspace(3) global [256 x [8 x <4 x i64>]] undef
Added: llvm/trunk/test/CodeGen/AMDGPU/vectorize-global-local.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/vectorize-global-local.ll?rev=298172&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/vectorize-global-local.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/vectorize-global-local.ll Fri Mar 17 18:56:58 2017
@@ -0,0 +1,80 @@
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
+; CHECK-DAG: flat_load_dwordx4
+; CHECK-DAG: flat_load_dwordx4
+; CHECK-DAG: flat_load_dwordx4
+; CHECK-DAG: flat_load_dwordx4
+; CHECK-DAG: ds_write2_b32
+; CHECK-DAG: ds_write2_b32
+; CHECK-DAG: ds_write2_b32
+; CHECK-DAG: ds_write2_b32
+; CHECK-DAG: ds_write2_b32
+; CHECK-DAG: ds_write2_b32
+; CHECK-DAG: ds_write2_b32
+; CHECK-DAG: ds_write2_b32
+
+define void @vectorize_global_local(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(3)* nocapture %arg1) {
+bb:
+ %tmp = load i32, i32 addrspace(1)* %arg, align 4
+ store i32 %tmp, i32 addrspace(3)* %arg1, align 4
+ %tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
+ %tmp3 = load i32, i32 addrspace(1)* %tmp2, align 4
+ %tmp4 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 1
+ store i32 %tmp3, i32 addrspace(3)* %tmp4, align 4
+ %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
+ %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
+ %tmp7 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 2
+ store i32 %tmp6, i32 addrspace(3)* %tmp7, align 4
+ %tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 3
+ %tmp9 = load i32, i32 addrspace(1)* %tmp8, align 4
+ %tmp10 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 3
+ store i32 %tmp9, i32 addrspace(3)* %tmp10, align 4
+ %tmp11 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 4
+ %tmp12 = load i32, i32 addrspace(1)* %tmp11, align 4
+ %tmp13 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 4
+ store i32 %tmp12, i32 addrspace(3)* %tmp13, align 4
+ %tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 5
+ %tmp15 = load i32, i32 addrspace(1)* %tmp14, align 4
+ %tmp16 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 5
+ store i32 %tmp15, i32 addrspace(3)* %tmp16, align 4
+ %tmp17 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 6
+ %tmp18 = load i32, i32 addrspace(1)* %tmp17, align 4
+ %tmp19 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 6
+ store i32 %tmp18, i32 addrspace(3)* %tmp19, align 4
+ %tmp20 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 7
+ %tmp21 = load i32, i32 addrspace(1)* %tmp20, align 4
+ %tmp22 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 7
+ store i32 %tmp21, i32 addrspace(3)* %tmp22, align 4
+ %tmp23 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 8
+ %tmp24 = load i32, i32 addrspace(1)* %tmp23, align 4
+ %tmp25 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 8
+ store i32 %tmp24, i32 addrspace(3)* %tmp25, align 4
+ %tmp26 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 9
+ %tmp27 = load i32, i32 addrspace(1)* %tmp26, align 4
+ %tmp28 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 9
+ store i32 %tmp27, i32 addrspace(3)* %tmp28, align 4
+ %tmp29 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 10
+ %tmp30 = load i32, i32 addrspace(1)* %tmp29, align 4
+ %tmp31 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 10
+ store i32 %tmp30, i32 addrspace(3)* %tmp31, align 4
+ %tmp32 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 11
+ %tmp33 = load i32, i32 addrspace(1)* %tmp32, align 4
+ %tmp34 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 11
+ store i32 %tmp33, i32 addrspace(3)* %tmp34, align 4
+ %tmp35 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 12
+ %tmp36 = load i32, i32 addrspace(1)* %tmp35, align 4
+ %tmp37 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 12
+ store i32 %tmp36, i32 addrspace(3)* %tmp37, align 4
+ %tmp38 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 13
+ %tmp39 = load i32, i32 addrspace(1)* %tmp38, align 4
+ %tmp40 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 13
+ store i32 %tmp39, i32 addrspace(3)* %tmp40, align 4
+ %tmp41 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 14
+ %tmp42 = load i32, i32 addrspace(1)* %tmp41, align 4
+ %tmp43 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 14
+ store i32 %tmp42, i32 addrspace(3)* %tmp43, align 4
+ %tmp44 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 15
+ %tmp45 = load i32, i32 addrspace(1)* %tmp44, align 4
+ %tmp46 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 15
+ store i32 %tmp45, i32 addrspace(3)* %tmp46, align 4
+ ret void
+}
More information about the llvm-commits
mailing list