[PATCH] D23477: [NVPTX] Use ldg for explicitly invariant loads.
Justin Lebar via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 10 18:47:41 PDT 2016
This revision was automatically updated to reflect the committed changes.
Closed by commit rL281152: [NVPTX] Use ldg for explicitly invariant loads. (authored by jlebar).
Changed prior to commit:
https://reviews.llvm.org/D23477?vs=67941&id=70958#toc
Repository:
rL LLVM
https://reviews.llvm.org/D23477
Files:
llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
llvm/trunk/test/CodeGen/NVPTX/ldg-invariant.ll
Index: llvm/trunk/test/CodeGen/NVPTX/ldg-invariant.ll
===================================================================
--- llvm/trunk/test/CodeGen/NVPTX/ldg-invariant.ll
+++ llvm/trunk/test/CodeGen/NVPTX/ldg-invariant.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
+
+; Check that invariant loads from the global addrspace are lowered to
+; ld.global.nc.
+
+; CHECK-LABEL: @ld_global
+define i32 @ld_global(i32 addrspace(1)* %ptr) {
+; CHECK: ld.global.nc.{{[a-z]}}32
+ %a = load i32, i32 addrspace(1)* %ptr, !invariant.load !0
+ ret i32 %a
+}
+
+; CHECK-LABEL: @ld_not_invariant
+define i32 @ld_not_invariant(i32 addrspace(1)* %ptr) {
+; CHECK: ld.global.{{[a-z]}}32
+ %a = load i32, i32 addrspace(1)* %ptr
+ ret i32 %a
+}
+
+; CHECK-LABEL: @ld_not_global_addrspace
+define i32 @ld_not_global_addrspace(i32 addrspace(0)* %ptr) {
+; CHECK: ld.{{[a-z]}}32
+ %a = load i32, i32 addrspace(0)* %ptr
+ ret i32 %a
+}
+
+!0 = !{}
Index: llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
===================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -558,21 +558,30 @@
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
unsigned CodeAddrSpace, MachineFunction *F) {
- // To use non-coherent caching, the load has to be from global
- // memory and we have to prove that the memory area is not written
- // to anywhere for the duration of the kernel call, not even after
- // the load.
+ // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
+ // space.
//
- // To ensure that there are no writes to the memory, we require the
- // underlying pointer to be a noalias (__restrict) kernel parameter
- // that is never used for a write. We can only do this for kernel
- // functions since from within a device function, we cannot know if
- // there were or will be writes to the memory from the caller - or we
- // could, but then we would have to do inter-procedural analysis.
- if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL ||
- !isKernelFunction(*F->getFunction())) {
+ // We have two ways of identifying invariant loads: Loads may be explicitly
+ // marked as invariant, or we may infer them to be invariant.
+ //
+ // We currently infer invariance only for kernel function pointer params that
+ // are noalias (i.e. __restrict) and never written to.
+ //
+ // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
+ // not during the SelectionDAG phase).
+ //
+ // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for
+ // explicitly invariant loads because these are how clang tells us to use ldg
+ // when the user uses a builtin.
+ if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
+ return false;
+
+ if (N->isInvariant())
+ return true;
+
+ // Load wasn't explicitly invariant. Attempt to infer invariance.
+ if (!isKernelFunction(*F->getFunction()))
return false;
- }
// We use GetUnderlyingObjects() here instead of
// GetUnderlyingObject() mainly because the former looks through phi
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D23477.70958.patch
Type: text/x-patch
Size: 3273 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160911/d4dc6507/attachment.bin>
More information about the llvm-commits
mailing list