[llvm] r345778 - Check shouldReduceLoadWidth from SimplifySetCC
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 31 14:24:30 PDT 2018
Author: rampitec
Date: Wed Oct 31 14:24:30 2018
New Revision: 345778
URL: http://llvm.org/viewvc/llvm-project?rev=345778&view=rev
Log:
Check shouldReduceLoadWidth from SimplifySetCC
SimplifySetCC could shrink a load without checking for
profitability or legality of such shink with a target.
Added checks to prevent shrinking of aligned scalar loads
in AMDGPU below dword as scalar engine does not support it.
Differential Revision: https://reviews.llvm.org/D53846
Added:
llvm/trunk/test/CodeGen/AMDGPU/setcc-limit-load-shrink.ll
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=345778&r1=345777&r2=345778&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Wed Oct 31 14:24:30 2018
@@ -2284,7 +2284,8 @@ SDValue TargetLowering::SimplifySetCC(EV
}
if (bestWidth) {
EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
- if (newVT.isRound()) {
+ if (newVT.isRound() &&
+ shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
EVT PtrType = Lod->getOperand(1).getValueType();
SDValue Ptr = Lod->getBasePtr();
if (bestOffset != 0)
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=345778&r1=345777&r2=345778&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Wed Oct 31 14:24:30 2018
@@ -667,6 +667,18 @@ bool AMDGPUTargetLowering::shouldReduceL
EVT OldVT = N->getValueType(0);
unsigned OldSize = OldVT.getStoreSizeInBits();
+ MemSDNode *MN = cast<MemSDNode>(N);
+ unsigned AS = MN->getAddressSpace();
+ // Do not shrink an aligned scalar load to sub-dword.
+ // Scalar engine cannot do sub-dword loads.
+ if (OldSize >= 32 && NewSize < 32 && MN->getAlignment() >= 4 &&
+ (AS == AMDGPUAS::CONSTANT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
+ (isa<LoadSDNode>(N) &&
+ AS == AMDGPUAS::GLOBAL_ADDRESS && MN->isInvariant())) &&
+ AMDGPUInstrInfo::isUniformMMO(MN->getMemOperand()))
+ return false;
+
// Don't produce extloads from sub 32-bit types. SI doesn't have scalar
// extloads, so doing one requires using a buffer_load. In cases where we
// still couldn't use a scalar load, using the wider load shouldn't really
Added: llvm/trunk/test/CodeGen/AMDGPU/setcc-limit-load-shrink.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/setcc-limit-load-shrink.ll?rev=345778&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/setcc-limit-load-shrink.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/setcc-limit-load-shrink.ll Wed Oct 31 14:24:30 2018
@@ -0,0 +1,65 @@
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}const_load_no_shrink_dword_to_unaligned_byte:
+; GCN: s_load_dword [[LD:s[0-9]+]],
+; GCN: s_bfe_i32 s{{[0-9]+}}, [[LD]], 0x10013
+define amdgpu_kernel void @const_load_no_shrink_dword_to_unaligned_byte(i32 addrspace(1)* %out, i32 addrspace(4)* %in, i32 %x) {
+ %ptr = getelementptr i32, i32 addrspace(4)* %in, i32 %x
+ %load = load i32, i32 addrspace(4)* %ptr, align 4
+ %and = and i32 %load, 524288
+ %cmp = icmp eq i32 %and, 0
+ %sel = select i1 %cmp, i32 0, i32 -1
+ store i32 %sel, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: const_load_no_shrink_dword_to_aligned_byte:
+; GCN: s_load_dword [[LD:s[0-9]+]],
+; GCN: s_bfe_i32 s{{[0-9]+}}, [[LD]], 0x10003
+define amdgpu_kernel void @const_load_no_shrink_dword_to_aligned_byte(i32 addrspace(1)* %out, i32 addrspace(4)* %in, i32 %x) {
+ %ptr = getelementptr i32, i32 addrspace(4)* %in, i32 %x
+ %load = load i32, i32 addrspace(4)* %ptr, align 4
+ %and = and i32 %load, 8
+ %cmp = icmp eq i32 %and, 0
+ %sel = select i1 %cmp, i32 0, i32 -1
+ store i32 %sel, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: global_load_no_shrink_dword_to_unaligned_byte:
+; GCN: s_load_dword [[LD:s[0-9]+]],
+; GCN: s_bfe_i32 s{{[0-9]+}}, [[LD]], 0x10013
+define amdgpu_kernel void @global_load_no_shrink_dword_to_unaligned_byte(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %x) {
+ %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %x
+ %load = load i32, i32 addrspace(1)* %ptr, align 4
+ %and = and i32 %load, 524288
+ %cmp = icmp eq i32 %and, 0
+ %sel = select i1 %cmp, i32 0, i32 -1
+ store i32 %sel, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: global_load_no_shrink_dword_to_aligned_byte:
+; GCN: s_load_dword [[LD:s[0-9]+]],
+; GCN: s_bfe_i32 s{{[0-9]+}}, [[LD]], 0x10003
+define amdgpu_kernel void @global_load_no_shrink_dword_to_aligned_byte(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %x) {
+ %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %x
+ %load = load i32, i32 addrspace(1)* %ptr, align 4
+ %and = and i32 %load, 8
+ %cmp = icmp eq i32 %and, 0
+ %sel = select i1 %cmp, i32 0, i32 -1
+ store i32 %sel, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: const_load_shrink_dword_to_unaligned_byte:
+; GCN: global_load_ushort
+define amdgpu_kernel void @const_load_shrink_dword_to_unaligned_byte(i32 addrspace(1)* %out, i32 addrspace(4)* %in, i32 %x) {
+ %ptr = getelementptr i32, i32 addrspace(4)* %in, i32 %x
+ %load = load i32, i32 addrspace(4)* %ptr, align 2
+ %and = and i32 %load, 524288
+ %cmp = icmp eq i32 %and, 0
+ %sel = select i1 %cmp, i32 0, i32 -1
+ store i32 %sel, i32 addrspace(1)* %out
+ ret void
+}
More information about the llvm-commits
mailing list