[llvm] r307861 - AMDGPU: Fix converting unanalyzable global loads to SMRD
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 12 16:06:19 PDT 2017
Author: arsenm
Date: Wed Jul 12 16:06:18 2017
New Revision: 307861
URL: http://llvm.org/viewvc/llvm-project?rev=307861&view=rev
Log:
AMDGPU: Fix converting unanalyzable global loads to SMRD
Not all memory dependence queries succeed, so this needs to
be conservative if it fails.
Added:
llvm/trunk/test/CodeGen/AMDGPU/global-smrd-unknown.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp?rev=307861&r1=307860&r2=307861&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp Wed Jul 12 16:06:18 2017
@@ -108,10 +108,11 @@ bool AMDGPUAnnotateUniformValues::isClob
DFS(Start, Checklist);
for (auto &BB : Checklist) {
BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ?
- BasicBlock::iterator(Load) : BB->end();
- if (MDR->getPointerDependencyFrom(MemoryLocation(Ptr),
- true, StartIt, BB, Load).isClobber())
- return true;
+ BasicBlock::iterator(Load) : BB->end();
+ auto Q = MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true,
+ StartIt, BB, Load);
+ if (Q.isClobber() || Q.isUnknown())
+ return true;
}
return false;
}
Added: llvm/trunk/test/CodeGen/AMDGPU/global-smrd-unknown.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/global-smrd-unknown.ll?rev=307861&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/global-smrd-unknown.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/global-smrd-unknown.ll Wed Jul 12 16:06:18 2017
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -memdep-block-scan-limit=1 -amdgpu-scalarize-global-loads -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}unknown_memdep_analysis:
+; GCN: flat_load_dword
+; GCN: flat_load_dword
+; GCN: flat_store_dword
+define amdgpu_kernel void @unknown_memdep_analysis(float addrspace(1)* nocapture readonly %arg) #0 {
+bb:
+ %tmp53 = load float, float addrspace(1)* undef, align 4
+ %tmp54 = getelementptr inbounds float, float addrspace(1)* %arg, i32 31
+ %tmp55 = load float, float addrspace(1)* %tmp54, align 4
+ %tmp56 = tail call float @llvm.fmuladd.f32(float undef, float %tmp53, float %tmp55)
+ store float %tmp56, float addrspace(1)* undef, align 4
+ ret void
+}
+
+declare float @llvm.fmuladd.f32(float, float, float) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone speculatable }
More information about the llvm-commits
mailing list