[llvm] r291792 - AMDGPU: Skip fneg/select combine if it can fold into other
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 12 10:58:15 PST 2017
Author: arsenm
Date: Thu Jan 12 12:58:15 2017
New Revision: 291792
URL: http://llvm.org/viewvc/llvm-project?rev=291792&view=rev
Log:
AMDGPU: Skip fneg/select combine if it can fold into other
Added:
llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=291792&r1=291791&r2=291792&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Thu Jan 12 12:58:15 2017
@@ -484,6 +484,24 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
// Target Information
//===----------------------------------------------------------------------===//
+static bool fnegFoldsIntoOp(unsigned Opc) {
+ switch (Opc) {
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FMA:
+ case ISD::FMAD:
+ case ISD::FSIN:
+ case AMDGPUISD::RCP:
+ case AMDGPUISD::RCP_LEGACY:
+ case AMDGPUISD::SIN_HW:
+ case AMDGPUISD::FMUL_LEGACY:
+ return true;
+ default:
+ return false;
+ }
+}
+
MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const {
return MVT::i32;
}
@@ -2738,20 +2756,31 @@ static SDValue foldFreeOpFromSelect(Targ
SDValue NewLHS = LHS.getOperand(0);
SDValue NewRHS = RHS;
- // TODO: Skip for operations where other combines can absord the fneg.
-
- if (LHS.getOpcode() == ISD::FNEG)
- NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
- else if (CRHS->isNegative())
- return SDValue();
+ // Careful: if the neg can be folded up, don't try to pull it back down.
+ bool ShouldFoldNeg = true;
- if (Inv)
- std::swap(NewLHS, NewRHS);
+ if (NewLHS.hasOneUse()) {
+ unsigned Opc = NewLHS.getOpcode();
+ if (LHS.getOpcode() == ISD::FNEG && fnegFoldsIntoOp(Opc))
+ ShouldFoldNeg = false;
+ if (LHS.getOpcode() == ISD::FABS && Opc == ISD::FMUL)
+ ShouldFoldNeg = false;
+ }
- SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT,
- Cond, NewLHS, NewRHS);
- DCI.AddToWorklist(NewSelect.getNode());
- return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect);
+ if (ShouldFoldNeg) {
+ if (LHS.getOpcode() == ISD::FNEG)
+ NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
+ else if (CRHS->isNegative())
+ return SDValue();
+
+ if (Inv)
+ std::swap(NewLHS, NewRHS);
+
+ SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT,
+ Cond, NewLHS, NewRHS);
+ DCI.AddToWorklist(NewSelect.getNode());
+ return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect);
+ }
}
return SDValue();
@@ -2806,24 +2835,6 @@ SDValue AMDGPUTargetLowering::performSel
return performCtlzCombine(SDLoc(N), Cond, True, False, DCI);
}
-static bool fnegFoldsIntoOp(unsigned Opc) {
- switch (Opc) {
- case ISD::FADD:
- case ISD::FSUB:
- case ISD::FMUL:
- case ISD::FMA:
- case ISD::FMAD:
- case ISD::FSIN:
- case AMDGPUISD::RCP:
- case AMDGPUISD::RCP_LEGACY:
- case AMDGPUISD::SIN_HW:
- case AMDGPUISD::FMUL_LEGACY:
- return true;
- default:
- return false;
- }
-}
-
SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
Added: llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll?rev=291792&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll Thu Jan 12 12:58:15 2017
@@ -0,0 +1,46 @@
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+
+; --------------------------------------------------------------------------------
+; Don't fold if fneg can fold into the source
+; --------------------------------------------------------------------------------
+
+; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_legacy_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+
+; GCN: v_rcp_legacy_f32_e32 [[RCP:v[0-9]+]], [[X]]
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc
+; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]]
+; GCN-NEXT: buffer_store_dword [[NEG_SELECT]]
+define void @select_fneg_posk_src_rcp_legacy_f32(i32 %c) #2 {
+ %x = load volatile float, float addrspace(1)* undef
+ %y = load volatile float, float addrspace(1)* undef
+ %cmp = icmp eq i32 %c, 0
+ %rcp = call float @llvm.amdgcn.rcp.legacy(float %x)
+ %fneg = fsub float -0.0, %rcp
+ %select = select i1 %cmp, float %fneg, float 2.0
+ store volatile float %select, float addrspace(1)* undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}select_fneg_posk_src_mul_legacy_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+
+; GCN: v_mul_legacy_f32_e32 [[MUL:v[0-9]+]], 4.0, [[X]]
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[MUL]], vcc
+; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]]
+; GCN-NEXT: buffer_store_dword [[NEG_SELECT]]
+define void @select_fneg_posk_src_mul_legacy_f32(i32 %c) #2 {
+ %x = load volatile float, float addrspace(1)* undef
+ %cmp = icmp eq i32 %c, 0
+ %mul = call float @llvm.amdgcn.fmul.legacy(float %x, float 4.0)
+ %fneg = fsub float -0.0, %mul
+ %select = select i1 %cmp, float %fneg, float 2.0
+ store volatile float %select, float addrspace(1)* undef
+ ret void
+}
+
+declare float @llvm.amdgcn.rcp.legacy(float) #1
+declare float @llvm.amdgcn.fmul.legacy(float, float) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll?rev=291792&r1=291791&r2=291792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll Thu Jan 12 12:58:15 2017
@@ -721,7 +721,120 @@ define void @mul_select_negk_negfabs_f32
ret void
}
+; --------------------------------------------------------------------------------
+; Don't fold if fneg can fold into the source
+; --------------------------------------------------------------------------------
+
+; GCN-LABEL: {{^}}select_fneg_posk_src_add_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+; GCN: buffer_load_dword [[Y:v[0-9]+]]
+
+; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], -4.0, [[X]]
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
+; GCN-NEXT: buffer_store_dword [[SELECT]]
+define void @select_fneg_posk_src_add_f32(i32 %c) #0 {
+ %x = load volatile float, float addrspace(1)* undef
+ %y = load volatile float, float addrspace(1)* undef
+ %cmp = icmp eq i32 %c, 0
+ %add = fadd float %x, 4.0
+ %fneg = fsub float -0.0, %add
+ %select = select i1 %cmp, float %fneg, float 2.0
+ store volatile float %select, float addrspace(1)* undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}select_fneg_posk_src_sub_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+
+; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], 4.0, [[X]]
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
+; GCN-NEXT: buffer_store_dword [[SELECT]]
+define void @select_fneg_posk_src_sub_f32(i32 %c) #0 {
+ %x = load volatile float, float addrspace(1)* undef
+ %cmp = icmp eq i32 %c, 0
+ %add = fsub float %x, 4.0
+ %fneg = fsub float -0.0, %add
+ %select = select i1 %cmp, float %fneg, float 2.0
+ store volatile float %select, float addrspace(1)* undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}select_fneg_posk_src_mul_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+
+; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[X]]
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[MUL]], vcc
+; GCN-NEXT: buffer_store_dword [[SELECT]]
+define void @select_fneg_posk_src_mul_f32(i32 %c) #0 {
+ %x = load volatile float, float addrspace(1)* undef
+ %cmp = icmp eq i32 %c, 0
+ %mul = fmul float %x, 4.0
+ %fneg = fsub float -0.0, %mul
+ %select = select i1 %cmp, float %fneg, float 2.0
+ store volatile float %select, float addrspace(1)* undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}select_fneg_posk_src_fma_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+; GCN: buffer_load_dword [[Z:v[0-9]+]]
+
+; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[X]], -4.0, -[[Z]]
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[FMA]], vcc
+; GCN-NEXT: buffer_store_dword [[SELECT]]
+define void @select_fneg_posk_src_fma_f32(i32 %c) #0 {
+ %x = load volatile float, float addrspace(1)* undef
+ %z = load volatile float, float addrspace(1)* undef
+ %cmp = icmp eq i32 %c, 0
+ %fma = call float @llvm.fma.f32(float %x, float 4.0, float %z)
+ %fneg = fsub float -0.0, %fma
+ %select = select i1 %cmp, float %fneg, float 2.0
+ store volatile float %select, float addrspace(1)* undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}select_fneg_posk_src_fmad_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+; GCN: buffer_load_dword [[Z:v[0-9]+]]
+
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[X]], vcc
+; GCN-NEXT: buffer_store_dword [[SELECT]]
+define void @select_fneg_posk_src_fmad_f32(i32 %c) #0 {
+ %x = load volatile float, float addrspace(1)* undef
+ %z = load volatile float, float addrspace(1)* undef
+ %cmp = icmp eq i32 %c, 0
+ %fmad = call float @llvm.fmuladd.f32(float %x, float 4.0, float %z)
+ %fneg = fsub float -0.0, %fmad
+ %select = select i1 %cmp, float %fneg, float 2.0
+ store volatile float %select, float addrspace(1)* undef
+ ret void
+}
+
+; FIXME: This one should fold to rcp
+; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+
+; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[X]]
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc
+; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]]
+; GCN-NEXT: buffer_store_dword [[NEG_SELECT]]
+define void @select_fneg_posk_src_rcp_f32(i32 %c) #0 {
+ %x = load volatile float, float addrspace(1)* undef
+ %y = load volatile float, float addrspace(1)* undef
+ %cmp = icmp eq i32 %c, 0
+ %rcp = call float @llvm.amdgcn.rcp.f32(float %x)
+ %fneg = fsub float -0.0, %rcp
+ %select = select i1 %cmp, float %fneg, float 2.0
+ store volatile float %select, float addrspace(1)* undef
+ ret void
+}
+
declare float @llvm.fabs.f32(float) #1
+declare float @llvm.fma.f32(float, float, float) #1
+declare float @llvm.fmuladd.f32(float, float, float) #1
+declare float @llvm.amdgcn.rcp.f32(float) #1
+declare float @llvm.amdgcn.rcp.legacy(float) #1
+declare float @llvm.amdgcn.fmul.legacy(float, float) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
More information about the llvm-commits
mailing list