[llvm] cb7b661 - AMDGPU: Analyze divergence of inline asm
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 3 12:42:30 PST 2020
Author: Matt Arsenault
Date: 2020-02-03T12:42:16-08:00
New Revision: cb7b661d3d3b547eaa377bdff0a0c94ea9e5458b
URL: https://github.com/llvm/llvm-project/commit/cb7b661d3d3b547eaa377bdff0a0c94ea9e5458b
DIFF: https://github.com/llvm/llvm-project/commit/cb7b661d3d3b547eaa377bdff0a0c94ea9e5458b.diff
LOG: AMDGPU: Analyze divergence of inline asm
Added:
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll
Modified:
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/inline-asm.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index a255a49b26b6..514842b50647 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -578,8 +578,6 @@ int GCNTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
}
}
-
-
static bool isArgPassedInSGPR(const Argument *A) {
const Function *F = A->getParent();
@@ -606,6 +604,54 @@ static bool isArgPassedInSGPR(const Argument *A) {
}
}
+/// Analyze if the results of inline asm are divergent. If \p Indices is empty,
+/// this is analyzing the collective result of all output registers. Otherwise,
+/// this is only querying a specific result index if this returns multiple
+/// registers in a struct.
+bool GCNTTIImpl::isInlineAsmSourceOfDivergence(
+ const CallInst *CI, ArrayRef<unsigned> Indices) const {
+ // TODO: Handle complex extract indices
+ if (Indices.size() > 1)
+ return true;
+
+ const DataLayout &DL = CI->getModule()->getDataLayout();
+ const SIRegisterInfo *TRI = ST->getRegisterInfo();
+ ImmutableCallSite CS(CI);
+ TargetLowering::AsmOperandInfoVector TargetConstraints
+ = TLI->ParseConstraints(DL, ST->getRegisterInfo(), CS);
+
+ const int TargetOutputIdx = Indices.empty() ? -1 : Indices[0];
+
+ int OutputIdx = 0;
+ for (auto &TC : TargetConstraints) {
+ if (TC.Type != InlineAsm::isOutput)
+ continue;
+
+ // Skip outputs we don't care about.
+ if (TargetOutputIdx != -1 && TargetOutputIdx != OutputIdx++)
+ continue;
+
+ TLI->ComputeConstraintToUse(TC, SDValue());
+
+ Register AssignedReg;
+ const TargetRegisterClass *RC;
+ std::tie(AssignedReg, RC) = TLI->getRegForInlineAsmConstraint(
+ TRI, TC.ConstraintCode, TC.ConstraintVT);
+ if (AssignedReg) {
+ // FIXME: This is a workaround for getRegForInlineAsmConstraint
+ // returning VS_32
+ RC = TRI->getPhysRegClass(AssignedReg);
+ }
+
+ // For AGPR constraints null is returned on subtargets without AGPRs, so
+ // assume divergent for null.
+ if (!RC || !TRI->isSGPRClass(RC))
+ return true;
+ }
+
+ return false;
+}
+
/// \returns true if the new GPU divergence analysis is enabled.
bool GCNTTIImpl::useGPUDivergenceAnalysis() const {
return !UseLegacyDA;
@@ -638,7 +684,14 @@ bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const {
return AMDGPU::isIntrinsicSourceOfDivergence(Intrinsic->getIntrinsicID());
// Assume all function calls are a source of divergence.
- if (isa<CallInst>(V) || isa<InvokeInst>(V))
+ if (const CallInst *CI = dyn_cast<CallInst>(V)) {
+ if (isa<InlineAsm>(CI->getCalledValue()))
+ return isInlineAsmSourceOfDivergence(CI);
+ return true;
+ }
+
+ // Assume all function calls are a source of divergence.
+ if (isa<InvokeInst>(V))
return true;
return false;
@@ -656,6 +709,19 @@ bool GCNTTIImpl::isAlwaysUniform(const Value *V) const {
return true;
}
}
+
+ const ExtractValueInst *ExtValue = dyn_cast<ExtractValueInst>(V);
+ if (!ExtValue)
+ return false;
+
+ if (const CallInst *CI = dyn_cast<CallInst>(ExtValue->getOperand(0))) {
+ // If we have inline asm returning mixed SGPR and VGPR results, we inferred
+ // divergent for the overall struct return. We need to override it in the
+ // case we're extracting an SGPR component here.
+ if (isa<InlineAsm>(CI->getCalledValue()))
+ return !isInlineAsmSourceOfDivergence(CI, ExtValue->getIndices());
+ }
+
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 7dd692be5530..dee2e7d314d6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -70,7 +70,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
friend BaseT;
const GCNSubtarget *ST;
- const AMDGPUTargetLowering *TLI;
+ const SITargetLowering *TLI;
AMDGPUTTIImpl CommonTTI;
bool IsGraphicsShader;
bool HasFP32Denormals;
@@ -183,6 +183,9 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
unsigned getCFInstrCost(unsigned Opcode);
+ bool isInlineAsmSourceOfDivergence(const CallInst *CI,
+ ArrayRef<unsigned> Indices = {}) const;
+
int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
bool isSourceOfDivergence(const Value *V) const;
bool isAlwaysUniform(const Value *V) const;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ea40c89ce639..77ae13d60281 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -10586,6 +10586,8 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(RC->getRegister(Idx), RC);
}
}
+
+ // FIXME: Returns VS_32 for physical SGPR constraints
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll
new file mode 100644
index 000000000000..8443b82f3888
--- /dev/null
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll
@@ -0,0 +1,108 @@
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=tahiti -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx908 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; Make sure nothing crashes on targets with or without AGPRs
+
+; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_sgpr_virtreg_output':
+; CHECK-NOT: DIVERGENT
+define i32 @inline_asm_1_sgpr_virtreg_output() {
+ %sgpr = call i32 asm "s_mov_b32 $0, 0", "=s"()
+ ret i32 %sgpr
+}
+
+; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_sgpr_physreg_output':
+; CHECK-NOT: DIVERGENT
+define i32 @inline_asm_1_sgpr_physreg_output() {
+ %sgpr = call i32 asm "s_mov_b32 s0, 0", "={s0}"()
+ ret i32 %sgpr
+}
+
+; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_vgpr_virtreg_output':
+; CHECK: DIVERGENT: %vgpr = call i32 asm "v_mov_b32 $0, 0", "=v"()
+define i32 @inline_asm_1_vgpr_virtreg_output() {
+ %vgpr = call i32 asm "v_mov_b32 $0, 0", "=v"()
+ ret i32 %vgpr
+}
+
+; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_vgpr_physreg_output':
+; CHECK: DIVERGENT: %vgpr = call i32 asm "v_mov_b32 v0, 0", "={v0}"()
+define i32 @inline_asm_1_vgpr_physreg_output() {
+ %vgpr = call i32 asm "v_mov_b32 v0, 0", "={v0}"()
+ ret i32 %vgpr
+}
+
+; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_agpr_virtreg_output':
+; CHECK: DIVERGENT: %vgpr = call i32 asm "; def $0", "=a"()
+define i32 @inline_asm_1_agpr_virtreg_output() {
+ %vgpr = call i32 asm "; def $0", "=a"()
+ ret i32 %vgpr
+}
+
+; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_agpr_physreg_output':
+; CHECK: DIVERGENT: %vgpr = call i32 asm "; def a0", "={a0}"()
+define i32 @inline_asm_1_agpr_physreg_output() {
+ %vgpr = call i32 asm "; def a0", "={a0}"()
+ ret i32 %vgpr
+}
+
+; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_2_sgpr_virtreg_output':
+; CHECK-NOT: DIVERGENT
+define void @inline_asm_2_sgpr_virtreg_output() {
+ %asm = call { i32, i32 } asm "; def $0, $1", "=s,=s"()
+ %sgpr0 = extractvalue { i32, i32 } %asm, 0
+ %sgpr1 = extractvalue { i32, i32 } %asm, 1
+ store i32 %sgpr0, i32 addrspace(1)* undef
+ store i32 %sgpr1, i32 addrspace(1)* undef
+ ret void
+}
+
+; One output is SGPR, one is VGPR. Infer divergent for the aggregate, but uniform on the SGPR extract
+; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_sgpr_vgpr_virtreg_output':
+; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1", "=s,=v"()
+; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 0
+; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 1
+define void @inline_asm_sgpr_vgpr_virtreg_output() {
+ %asm = call { i32, i32 } asm "; def $0, $1", "=s,=v"()
+ %sgpr = extractvalue { i32, i32 } %asm, 0
+ %vgpr = extractvalue { i32, i32 } %asm, 1
+ store i32 %sgpr, i32 addrspace(1)* undef
+ store i32 %vgpr, i32 addrspace(1)* undef
+ ret void
+}
+
+; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_vgpr_sgpr_virtreg_output':
+; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1", "=v,=s"()
+; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 0
+; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 1
+define void @inline_asm_vgpr_sgpr_virtreg_output() {
+ %asm = call { i32, i32 } asm "; def $0, $1", "=v,=s"()
+ %vgpr = extractvalue { i32, i32 } %asm, 0
+ %sgpr = extractvalue { i32, i32 } %asm, 1
+ store i32 %vgpr, i32 addrspace(1)* undef
+ store i32 %sgpr, i32 addrspace(1)* undef
+ ret void
+}
+
+; Have an extra output constraint
+; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'multi_sgpr_inline_asm_output_input_constraint':
+; CHECK-NOT: DIVERGENT
+define void @multi_sgpr_inline_asm_output_input_constraint() {
+ %asm = call { i32, i32 } asm "; def $0, $1", "=s,=s,s"(i32 1234)
+ %sgpr0 = extractvalue { i32, i32 } %asm, 0
+ %sgpr1 = extractvalue { i32, i32 } %asm, 1
+ store i32 %sgpr0, i32 addrspace(1)* undef
+ store i32 %sgpr1, i32 addrspace(1)* undef
+ ret void
+}
+
+; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_vgpr_sgpr_virtreg_output_input_constraint':
+; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1", "=v,=s,v"(i32 1234)
+; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 0
+; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 1
+define void @inline_asm_vgpr_sgpr_virtreg_output_input_constraint() {
+ %asm = call { i32, i32 } asm "; def $0, $1", "=v,=s,v"(i32 1234)
+ %vgpr = extractvalue { i32, i32 } %asm, 0
+ %sgpr = extractvalue { i32, i32 } %asm, 1
+ store i32 %vgpr, i32 addrspace(1)* undef
+ store i32 %sgpr, i32 addrspace(1)* undef
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.ll
index adf786c0e92a..48e12295989d 100644
--- a/llvm/test/CodeGen/AMDGPU/inline-asm.ll
+++ b/llvm/test/CodeGen/AMDGPU/inline-asm.ll
@@ -21,11 +21,30 @@ entry:
}
-; CHECK: {{^}}branch_on_asm:
-; Make sure inline assembly is treted as divergent.
-; CHECK: s_mov_b32 s{{[0-9]+}}, 0
+; CHECK-LABEL: {{^}}branch_on_asm_vgpr:
+; Make sure VGPR inline assembly is treated as divergent.
+; CHECK: v_mov_b32 v{{[0-9]+}}, 0
+; CHECK: v_cmp_eq_u32
; CHECK: s_and_saveexec_b64
-define amdgpu_kernel void @branch_on_asm(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @branch_on_asm_vgpr(i32 addrspace(1)* %out) {
+ %zero = call i32 asm "v_mov_b32 $0, 0", "=v"()
+ %cmp = icmp eq i32 %zero, 0
+ br i1 %cmp, label %if, label %endif
+
+if:
+ store i32 0, i32 addrspace(1)* %out
+ br label %endif
+
+endif:
+ ret void
+}
+
+; CHECK-LABEL: {{^}}branch_on_asm_sgpr:
+; Make sure SGPR inline assembly is treated as uniform
+; CHECK: s_mov_b32 s{{[0-9]+}}, 0
+; CHECK: s_cmp_lg_u32
+; CHECK: s_cbranch_scc0
+define amdgpu_kernel void @branch_on_asm_sgpr(i32 addrspace(1)* %out) {
%zero = call i32 asm "s_mov_b32 $0, 0", "=s"()
%cmp = icmp eq i32 %zero, 0
br i1 %cmp, label %if, label %endif
More information about the llvm-commits
mailing list