[llvm] r363588 - [AMDGPU] gfx1010 wavefrontsize intrinsic folding
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 17 10:57:50 PDT 2019
Author: rampitec
Date: Mon Jun 17 10:57:50 2019
New Revision: 363588
URL: http://llvm.org/viewvc/llvm-project?rev=363588&view=rev
Log:
[AMDGPU] gfx1010 wavefrontsize intrinsic folding
Differential Revision: https://reviews.llvm.org/D63206
Added:
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
llvm/trunk/lib/Target/AMDGPU/AMDGPULibCalls.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.h?rev=363588&r1=363587&r2=363588&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.h Mon Jun 17 10:57:50 2019
@@ -53,7 +53,8 @@ FunctionPass *createSIMemoryLegalizerPas
FunctionPass *createSIInsertWaitcntsPass();
FunctionPass *createSIPreAllocateWWMRegsPass();
FunctionPass *createSIFormMemoryClausesPass();
-FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetOptions &);
+FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetOptions &,
+ const TargetMachine *);
FunctionPass *createAMDGPUUseNativeCallsPass();
FunctionPass *createAMDGPUCodeGenPreparePass();
FunctionPass *createAMDGPUMachineCFGStructurizerPass();
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPULibCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPULibCalls.cpp?rev=363588&r1=363587&r2=363588&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPULibCalls.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPULibCalls.cpp Mon Jun 17 10:57:50 2019
@@ -15,6 +15,7 @@
#include "AMDGPU.h"
#include "AMDGPULibFunc.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/ADT/StringSet.h"
@@ -22,6 +23,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
@@ -29,6 +31,7 @@
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <vector>
#include <cmath>
@@ -65,6 +68,8 @@ private:
typedef llvm::AMDGPULibFunc FuncInfo;
+ const TargetMachine *TM;
+
// -fuse-native.
bool AllNative = false;
@@ -134,6 +139,9 @@ private:
// __read_pipe/__write_pipe
bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, FuncInfo &FInfo);
+ // llvm.amdgcn.wavefrontsize
+ bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B);
+
// Get insertion point at entry.
BasicBlock::iterator getEntryIns(CallInst * UI);
// Insert an Alloc instruction.
@@ -152,6 +160,8 @@ protected:
}
public:
+ AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {}
+
bool fold(CallInst *CI, AliasAnalysis *AA = nullptr);
void initNativeFuncs();
@@ -166,15 +176,16 @@ namespace {
class AMDGPUSimplifyLibCalls : public FunctionPass {
- AMDGPULibCalls Simplifier;
-
const TargetOptions Options;
+ AMDGPULibCalls Simplifier;
+
public:
static char ID; // Pass identification
- AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions())
- : FunctionPass(ID), Options(Opt) {
+ AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions(),
+ const TargetMachine *TM = nullptr)
+ : FunctionPass(ID), Options(Opt), Simplifier(TM) {
initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
}
@@ -639,14 +650,6 @@ bool AMDGPULibCalls::fold(CallInst *CI,
// Ignore indirect calls.
if (Callee == 0) return false;
- FuncInfo FInfo;
- if (!parseFunctionName(Callee->getName(), &FInfo))
- return false;
-
- // Further check the number of arguments to see if they match.
- if (CI->getNumArgOperands() != FInfo.getNumArgs())
- return false;
-
BasicBlock *BB = CI->getParent();
LLVMContext &Context = CI->getParent()->getContext();
IRBuilder<> B(Context);
@@ -658,6 +661,21 @@ bool AMDGPULibCalls::fold(CallInst *CI,
if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI))
B.setFastMathFlags(FPOp->getFastMathFlags());
+ switch (Callee->getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::amdgcn_wavefrontsize:
+ return !EnablePreLink && fold_wavefrontsize(CI, B);
+ }
+
+ FuncInfo FInfo;
+ if (!parseFunctionName(Callee->getName(), &FInfo))
+ return false;
+
+ // Further check the number of arguments to see if they match.
+ if (CI->getNumArgOperands() != FInfo.getNumArgs())
+ return false;
+
if (TDOFold(CI, FInfo))
return true;
@@ -1371,6 +1389,29 @@ bool AMDGPULibCalls::fold_sincos(CallIns
return true;
}
+bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) {
+ if (!TM)
+ return false;
+
+ StringRef CPU = TM->getTargetCPU();
+ StringRef Features = TM->getTargetFeatureString();
+ if ((CPU.empty() || CPU.equals_lower("generic")) &&
+ (Features.empty() ||
+ Features.find_lower("wavefrontsize") == StringRef::npos))
+ return false;
+
+ Function *F = CI->getParent()->getParent();
+ const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F);
+ unsigned N = ST.getWavefrontSize();
+
+ LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with "
+ << N << "\n");
+
+ CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N));
+ CI->eraseFromParent();
+ return true;
+}
+
// Get insertion point at entry.
BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
Function * Func = UI->getParent()->getParent();
@@ -1680,8 +1721,9 @@ bool AMDGPULibCalls::evaluateCall(CallIn
}
// Public interface to the Simplify LibCalls pass.
-FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt) {
- return new AMDGPUSimplifyLibCalls(Opt);
+FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt,
+ const TargetMachine *TM) {
+ return new AMDGPUSimplifyLibCalls(Opt, TM);
}
FunctionPass *llvm::createAMDGPUUseNativeCallsPass() {
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=363588&r1=363587&r2=363588&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Mon Jun 17 10:57:50 2019
@@ -432,7 +432,7 @@ void AMDGPUTargetMachine::adjustPassMana
PM.add(llvm::createAMDGPUPropagateAttributesEarlyPass(this));
PM.add(llvm::createAMDGPUUseNativeCallsPass());
if (LibCallSimplify)
- PM.add(llvm::createAMDGPUSimplifyLibCallsPass(Opt));
+ PM.add(llvm::createAMDGPUSimplifyLibCallsPass(Opt, this));
});
Builder.addExtension(
Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll?rev=363588&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll Mon Jun 17 10:57:50 2019
@@ -0,0 +1,84 @@
+; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W32 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
+
+; RUN: opt -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-WXX %s
+; RUN: opt -mtriple=amdgcn-- -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-WXX %s
+; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+WavefrontSize32 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s
+; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+WavefrontSize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s
+
+; GCN-LABEL: {{^}}fold_wavefrontsize:
+; OPT-LABEL: define amdgpu_kernel void @fold_wavefrontsize(
+
+; W32: v_mov_b32_e32 [[V:v[0-9]+]], 32
+; W64: v_mov_b32_e32 [[V:v[0-9]+]], 64
+; GCN: store_dword v[{{[0-9:]+}}], [[V]]
+
+; OPT-W32: store i32 32, i32 addrspace(1)* %arg, align 4
+; OPT-W64: store i32 64, i32 addrspace(1)* %arg, align 4
+; OPT-WXX: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize()
+; OPT-WXX: store i32 %tmp, i32 addrspace(1)* %arg, align 4
+; OPT-NEXT: ret void
+
+define amdgpu_kernel void @fold_wavefrontsize(i32 addrspace(1)* nocapture %arg) {
+bb:
+ %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0
+ store i32 %tmp, i32 addrspace(1)* %arg, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}fold_and_optimize_wavefrontsize:
+; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_wavefrontsize(
+
+; W32: v_mov_b32_e32 [[V:v[0-9]+]], 1{{$}}
+; W64: v_mov_b32_e32 [[V:v[0-9]+]], 2{{$}}
+; GCN-NOT: cndmask
+; GCN: store_dword v[{{[0-9:]+}}], [[V]]
+
+; OPT-W32: store i32 1, i32 addrspace(1)* %arg, align 4
+; OPT-W64: store i32 2, i32 addrspace(1)* %arg, align 4
+; OPT-WXX: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize()
+; OPT-WXX: %tmp1 = icmp ugt i32 %tmp, 32
+; OPT-WXX: %tmp2 = select i1 %tmp1, i32 2, i32 1
+; OPT-WXX: store i32 %tmp2, i32 addrspace(1)* %arg
+; OPT-NEXT: ret void
+
+define amdgpu_kernel void @fold_and_optimize_wavefrontsize(i32 addrspace(1)* nocapture %arg) {
+bb:
+ %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0
+ %tmp1 = icmp ugt i32 %tmp, 32
+ %tmp2 = select i1 %tmp1, i32 2, i32 1
+ store i32 %tmp2, i32 addrspace(1)* %arg
+ ret void
+}
+
+; GCN-LABEL: {{^}}fold_and_optimize_if_wavefrontsize:
+; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(
+
+; OPT: bb:
+; OPT-WXX: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize()
+; OPT-WXX: %tmp1 = icmp ugt i32 %tmp, 32
+; OPT-WXX: bb3:
+; OPT-W64: store i32 1, i32 addrspace(1)* %arg, align 4
+; OPT-NEXT: ret void
+
+define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(i32 addrspace(1)* nocapture %arg) {
+bb:
+ %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0
+ %tmp1 = icmp ugt i32 %tmp, 32
+ br i1 %tmp1, label %bb2, label %bb3
+
+bb2: ; preds = %bb
+ store i32 1, i32 addrspace(1)* %arg, align 4
+ br label %bb3
+
+bb3: ; preds = %bb2, %bb
+ ret void
+}
+
+declare i32 @llvm.amdgcn.wavefrontsize() #0
+
+attributes #0 = { nounwind readnone speculatable }
More information about the llvm-commits
mailing list