[PATCH] D22748: AMDGPU: override shouldNormalizeToSelectSequence
Nicolai Hähnle via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 25 03:33:50 PDT 2016
nhaehnle created this revision.
nhaehnle added reviewers: arsenm, tstellarAMD.
nhaehnle added a subscriber: llvm-commits.
Herald added subscribers: kzhuravl, arsenm.
Prefer to keep logic operations on i1 flags so that they get lowered to the
corresponding SALU instructions instead of the equivalent v_cndmask. The idea
is to get a better balance of SALU and VALU instructions, especially when
combined with https://reviews.llvm.org/D22747.
https://reviews.llvm.org/D22748
Files:
lib/Target/AMDGPU/SIISelLowering.cpp
lib/Target/AMDGPU/SIISelLowering.h
test/CodeGen/AMDGPU/select-andor.ll
Index: test/CodeGen/AMDGPU/select-andor.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/select-andor.ll
@@ -0,0 +1,28 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}select_and:
+; CHECK: v_cmp_lt
+; CHECK-NEXT: v_cmp_lt
+; CHECK-NEXT: s_and_b64
+; CHECK-NEXT: v_cndmask
+define amdgpu_vs float @select_and(i32 %cond1, i32 %cond2, float %a, float %b) nounwind {
+ %cc1 = icmp ugt i32 %cond1, 5
+ %cc2 = icmp ugt i32 %cond2, 7
+ %cc = and i1 %cc1, %cc2
+ %sel = select i1 %cc, float %a, float %b
+ ret float %sel
+}
+
+; CHECK-LABEL: {{^}}select_or:
+; CHECK: v_cmp_lt
+; CHECK-NEXT: v_cmp_lt
+; CHECK-NEXT: s_or_b64
+; CHECK-NEXT: v_cndmask
+define amdgpu_vs float @select_or(i32 %cond1, i32 %cond2, float %a, float %b) nounwind {
+ %cc1 = icmp ugt i32 %cond1, 5
+ %cc2 = icmp ugt i32 %cond2, 7
+ %cc = or i1 %cc1, %cc2
+ %sel = select i1 %cc, float %a, float %b
+ ret float %sel
+}
Index: lib/Target/AMDGPU/SIISelLowering.h
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.h
+++ lib/Target/AMDGPU/SIISelLowering.h
@@ -110,6 +110,8 @@
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+ bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
+
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1828,6 +1828,12 @@
!shouldEmitGOTReloc(GA->getGlobal(), getTargetMachine());
}
+bool SITargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
+ EVT) const {
+ // Prefer to keep i1 flags around so that boolean logic is done with SALU.
+ return false;
+}
+
static SDValue buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
SDLoc DL, unsigned Offset, EVT PtrVT,
unsigned GAFlags = SIInstrInfo::MO_NONE) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D22748.65323.patch
Type: text/x-patch
Size: 2412 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160725/f1548032/attachment.bin>
More information about the llvm-commits
mailing list