[PATCH] D88574: AMDGPU/SelectionDAG Include fast-math-flags for fmed3 intrinsic
Petar Avramovic via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 30 07:41:18 PDT 2020
Petar.Avramovic created this revision.
Petar.Avramovic added reviewers: foad, arsenm.
Herald added subscribers: llvm-commits, kerbowa, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl.
Herald added a project: LLVM.
Petar.Avramovic requested review of this revision.
Herald added a subscriber: wdng.
Copy fast-math-flags for target intrinsics from IR in SelectionDAGBuilder.
Also copy this flag when amdgcn_fmed3 intrinsic is changed to FMED3.
https://reviews.llvm.org/D88574
Files:
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/clamp.ll
Index: llvm/test/CodeGen/AMDGPU/clamp.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/clamp.ll
+++ llvm/test/CodeGen/AMDGPU/clamp.ll
@@ -336,7 +336,7 @@
; GCN-LABEL: {{^}}v_clamp_nnan_med3_ayb_f32:
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
-; GCN: v_med3_f32 v{{[0-9]+}}, 0, [[A]], 1.0
+; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
define amdgpu_kernel void @v_clamp_nnan_med3_ayb_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6620,8 +6620,8 @@
case Intrinsic::amdgcn_ballot:
return lowerBALLOTIntrinsic(*this, Op.getNode(), DAG);
case Intrinsic::amdgcn_fmed3:
- return DAG.getNode(AMDGPUISD::FMED3, DL, VT,
- Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ return DAG.getNode(AMDGPUISD::FMED3, DL, VT, Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3), Op->getFlags());
case Intrinsic::amdgcn_fdot2:
return DAG.getNode(AMDGPUISD::FDOT2, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -757,7 +757,8 @@
void visitInlineAsm(const CallBase &Call);
void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
- void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
+ void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic,
+ SDNodeFlags Flags);
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
void visitVAStart(const CallInst &I);
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4602,7 +4602,8 @@
/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
/// node.
void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
- unsigned Intrinsic) {
+ unsigned Intrinsic,
+ SDNodeFlags Flags) {
// Ignore the callsite's attributes. A specific call site may be marked with
// readnone, but the lowering code will expect the chain based on the
// definition.
@@ -4679,6 +4680,7 @@
} else {
Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
}
+ Result->setFlags(Flags);
if (HasChain) {
SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
@@ -5611,7 +5613,7 @@
switch (Intrinsic) {
default:
// By default, turn this into a target intrinsic node.
- visitTargetIntrinsic(I, Intrinsic);
+ visitTargetIntrinsic(I, Intrinsic, Flags);
return;
case Intrinsic::vscale: {
match(&I, m_VScale(DAG.getDataLayout()));
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D88574.295281.patch
Type: text/x-patch
Size: 3443 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200930/9f4c9369/attachment.bin>
More information about the llvm-commits
mailing list