[llvm] r334882 - [AMDGPU] setcc (select cc, CT, CF), CF, eq | ne -> xor cc, -1 | cc
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 15 20:46:59 PDT 2018
Author: rampitec
Date: Fri Jun 15 20:46:59 2018
New Revision: 334882
URL: http://llvm.org/viewvc/llvm-project?rev=334882&view=rev
Log:
[AMDGPU] setcc (select cc, CT, CF), CF, eq | ne -> xor cc, -1 | cc
This is the common case in the BE when we serialize condition and then
rematerialize it. Use either original or inverted condition.
Differential Revision: https://reviews.llvm.org/D48246
Added:
llvm/trunk/test/CodeGen/AMDGPU/dagcombine-setcc-select.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=334882&r1=334881&r2=334882&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Fri Jun 15 20:46:59 2018
@@ -7490,23 +7490,49 @@ SDValue SITargetLowering::performSetCCCo
}
}
- if (CRHS && VT == MVT::i32 && LHS.getOpcode() == ISD::SIGN_EXTEND &&
- isBoolSGPR(LHS.getOperand(0))) {
- // setcc (sext from i1 cc), -1, ne|sgt|ult) => not cc => xor cc, -1
- // setcc (sext from i1 cc), -1, eq|sle|uge) => cc
- // setcc (sext from i1 cc), 0, eq|sge|ule) => not cc => xor cc, -1
- // setcc (sext from i1 cc), 0, ne|ugt|slt) => cc
- if ((CRHS->isAllOnesValue() &&
- (CC == ISD::SETNE || CC == ISD::SETGT || CC == ISD::SETULT)) ||
- (CRHS->isNullValue() &&
- (CC == ISD::SETEQ || CC == ISD::SETGE || CC == ISD::SETULE)))
- return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0),
- DAG.getConstant(-1, SL, MVT::i1));
- if ((CRHS->isAllOnesValue() &&
- (CC == ISD::SETEQ || CC == ISD::SETLE || CC == ISD::SETUGE)) ||
- (CRHS->isNullValue() &&
- (CC == ISD::SETNE || CC == ISD::SETUGT || CC == ISD::SETLT)))
- return LHS.getOperand(0);
+ if (CRHS) {
+ if (VT == MVT::i32 && LHS.getOpcode() == ISD::SIGN_EXTEND &&
+ isBoolSGPR(LHS.getOperand(0))) {
+ // setcc (sext from i1 cc), -1, ne|sgt|ult) => not cc => xor cc, -1
+ // setcc (sext from i1 cc), -1, eq|sle|uge) => cc
+ // setcc (sext from i1 cc), 0, eq|sge|ule) => not cc => xor cc, -1
+ // setcc (sext from i1 cc), 0, ne|ugt|slt) => cc
+ if ((CRHS->isAllOnesValue() &&
+ (CC == ISD::SETNE || CC == ISD::SETGT || CC == ISD::SETULT)) ||
+ (CRHS->isNullValue() &&
+ (CC == ISD::SETEQ || CC == ISD::SETGE || CC == ISD::SETULE)))
+ return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0),
+ DAG.getConstant(-1, SL, MVT::i1));
+ if ((CRHS->isAllOnesValue() &&
+ (CC == ISD::SETEQ || CC == ISD::SETLE || CC == ISD::SETUGE)) ||
+ (CRHS->isNullValue() &&
+ (CC == ISD::SETNE || CC == ISD::SETUGT || CC == ISD::SETLT)))
+ return LHS.getOperand(0);
+ }
+
+ uint64_t CRHSVal = CRHS->getZExtValue();
+ if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
+ LHS.getOpcode() == ISD::SELECT &&
+ isa<ConstantSDNode>(LHS.getOperand(1)) &&
+ isa<ConstantSDNode>(LHS.getOperand(2)) &&
+ LHS.getConstantOperandVal(1) != LHS.getConstantOperandVal(2) &&
+ isBoolSGPR(LHS.getOperand(0))) {
+ // Given CT != FT:
+ // setcc (select cc, CT, CF), CF, eq => xor cc, -1
+ // setcc (select cc, CT, CF), CF, ne => cc
+ // setcc (select cc, CT, CF), CT, ne => xor cc, -1
+ // setcc (select cc, CT, CF), CT, eq => cc
+ uint64_t CT = LHS.getConstantOperandVal(1);
+ uint64_t CF = LHS.getConstantOperandVal(2);
+
+ if ((CF == CRHSVal && CC == ISD::SETEQ) ||
+ (CT == CRHSVal && CC == ISD::SETNE))
+ return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0),
+ DAG.getConstant(-1, SL, MVT::i1));
+ if ((CF == CRHSVal && CC == ISD::SETNE) ||
+ (CT == CRHSVal && CC == ISD::SETEQ))
+ return LHS.getOperand(0);
+ }
}
if (VT != MVT::f32 && VT != MVT::f64 && (Subtarget->has16BitInsts() &&
Added: llvm/trunk/test/CodeGen/AMDGPU/dagcombine-setcc-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/dagcombine-setcc-select.ll?rev=334882&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/dagcombine-setcc-select.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/dagcombine-setcc-select.ll Fri Jun 15 20:46:59 2018
@@ -0,0 +1,104 @@
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -O0 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}eq_t:
+; GCN-DAG: s_load_dword [[X:s[0-9]+]]
+; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
+; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[ONE]]{{$}}
+; GCN-NOT: 0xddd5
+; GCN-NOT: v_cndmask_b32
+; GCN-NOT: v_cmp_eq_u32
+; GCN-NOT: v_cndmask_b32
+; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0
+; GCN-DAG: v_mov_b32_e32 [[FOUR:v[0-9]+]], 4.0
+; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[TWO]], [[FOUR]], [[CC]]
+; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
+define amdgpu_kernel void @eq_t(float %x) {
+ %c1 = fcmp olt float %x, 1.0
+ %s1 = select i1 %c1, i32 56789, i32 1
+ %c2 = icmp eq i32 %s1, 56789
+ %s2 = select i1 %c2, float 4.0, float 2.0
+ store float %s2, float* undef, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}ne_t:
+; GCN-DAG: s_load_dword [[X:s[0-9]+]]
+; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
+; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[ONE]]{{$}}
+; GCN-NOT: 0xddd5
+; GCN-NOT: v_cndmask_b32
+; GCN-NOT: v_cmp_eq_u32
+; GCN-NOT: v_cndmask_b32
+; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0
+; GCN-DAG: v_mov_b32_e32 [[FOUR:v[0-9]+]], 4.0
+; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[FOUR]], [[TWO]], [[CC]]
+; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
+define amdgpu_kernel void @ne_t(float %x) {
+ %c1 = fcmp olt float %x, 1.0
+ %s1 = select i1 %c1, i32 56789, i32 1
+ %c2 = icmp ne i32 %s1, 56789
+ %s2 = select i1 %c2, float 4.0, float 2.0
+ store float %s2, float* undef, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}eq_f:
+; GCN-DAG: s_load_dword [[X:s[0-9]+]]
+; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
+; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[ONE]]{{$}}
+; GCN-NOT: 0xddd5
+; GCN-NOT: v_cndmask_b32
+; GCN-NOT: v_cmp_eq_u32
+; GCN-NOT: v_cndmask_b32
+; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0
+; GCN-DAG: v_mov_b32_e32 [[FOUR:v[0-9]+]], 4.0
+; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[FOUR]], [[TWO]], [[CC]]
+; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
+define amdgpu_kernel void @eq_f(float %x) {
+ %c1 = fcmp olt float %x, 1.0
+ %s1 = select i1 %c1, i32 1, i32 56789
+ %c2 = icmp eq i32 %s1, 56789
+ %s2 = select i1 %c2, float 4.0, float 2.0
+ store float %s2, float* undef, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}ne_f:
+; GCN-DAG: s_load_dword [[X:s[0-9]+]]
+; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
+; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[ONE]]{{$}}
+; GCN-NOT: 0xddd5
+; GCN-NOT: v_cndmask_b32
+; GCN-NOT: v_cmp_eq_u32
+; GCN-NOT: v_cndmask_b32
+; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0
+; GCN-DAG: v_mov_b32_e32 [[FOUR:v[0-9]+]], 4.0
+; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[TWO]], [[FOUR]], [[CC]]
+; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
+define amdgpu_kernel void @ne_f(float %x) {
+ %c1 = fcmp olt float %x, 1.0
+ %s1 = select i1 %c1, i32 1, i32 56789
+ %c2 = icmp ne i32 %s1, 56789
+ %s2 = select i1 %c2, float 4.0, float 2.0
+ store float %s2, float* undef, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}different_constants:
+; GCN-DAG: s_load_dword [[X:s[0-9]+]]
+; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
+; GCN-DAG: v_cmp_lt_f32_e{{32|64}} [[CC1:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[ONE]]{{$}}
+; GCN-DAG: v_cndmask_b32_e{{32|64}} [[CND1:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, [[CC1]]
+; GCN-DAG: v_cmp_eq_u32_e{{32|64}} [[CC2:s\[[0-9]+:[0-9]+\]|vcc]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
+; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0
+; GCN-DAG: v_mov_b32_e32 [[FOUR:v[0-9]+]], 4.0
+; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[TWO]], [[FOUR]], [[CC2]]
+; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
+define amdgpu_kernel void @different_constants(float %x) {
+ %c1 = fcmp olt float %x, 1.0
+ %s1 = select i1 %c1, i32 56789, i32 1
+ %c2 = icmp eq i32 %s1, 5678
+ %s2 = select i1 %c2, float 4.0, float 2.0
+ store float %s2, float* undef, align 4
+ ret void
+}
More information about the llvm-commits
mailing list