[PATCH] D50468: AMDGPU: More canonicalized operations
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 8 11:42:22 PDT 2018
arsenm created this revision.
arsenm added a reviewer: rampitec.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, kzhuravl.
https://reviews.llvm.org/D50468
Files:
lib/Target/AMDGPU/AMDGPUISelLowering.cpp
lib/Target/AMDGPU/SIISelLowering.cpp
test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
Index: test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
===================================================================
--- test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@@ -837,6 +837,26 @@
ret <2 x half> %canonicalized
}
+; GCN-LABEL: {{^}}v_test_canonicalize_cubeid:
+; GCN: s_waitcnt
+; GCN-NEXT: v_cubeid_f32 v0, v0, v1, v2
+; GCN-NEXT: s_setpc_b64
+define float @v_test_canonicalize_cubeid(float %a, float %b, float %c) {
+ %cvt = call float @llvm.amdgcn.cubeid(float %a, float %b, float %c)
+ %canonicalized = call float @llvm.canonicalize.f32(float %cvt)
+ ret float %canonicalized
+}
+
+; GCN-LABEL: {{^}}v_test_canonicalize_frexp_mant:
+; GCN: s_waitcnt
+; GCN-NEXT: v_frexp_mant_f32_e32 v0, v0
+; GCN-NEXT: s_setpc_b64
+define float @v_test_canonicalize_frexp_mant(float %a) {
+ %cvt = call float @llvm.amdgcn.frexp.mant.f32(float %a)
+ %canonicalized = call float @llvm.canonicalize.f32(float %cvt)
+ ret float %canonicalized
+}
+
; Avoid failing the test on FreeBSD11.0 which will match the GCN-NOT: 1.0
; in the .amd_amdgpu_isa "amdgcn-unknown-freebsd11.0--gfx802" directive
; CHECK: .amd_amdgpu_isa
@@ -863,6 +883,8 @@
declare float @llvm.maxnum.f32(float, float) #0
declare double @llvm.maxnum.f64(double, double) #0
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #0
+declare float @llvm.amdgcn.cubeid(float, float, float) #0
+declare float @llvm.amdgcn.frexp.mant.f32(float) #0
attributes #0 = { nounwind readnone }
attributes #1 = { "no-nans-fp-math"="true" }
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6832,6 +6832,10 @@
case AMDGPUISD::FRACT:
case AMDGPUISD::LDEXP:
case AMDGPUISD::CVT_PKRTZ_F16_F32:
+ case AMDGPUISD::CVT_F32_UBYTE0:
+ case AMDGPUISD::CVT_F32_UBYTE1:
+ case AMDGPUISD::CVT_F32_UBYTE2:
+ case AMDGPUISD::CVT_F32_UBYTE3:
return true;
// It can/will be lowered or combined as a bit operation.
@@ -6911,6 +6915,9 @@
// TODO: Handle more intrinsics
switch (IntrinsicID) {
case Intrinsic::amdgcn_cvt_pkrtz:
+ case Intrinsic::amdgcn_cubeid:
+ case Intrinsic::amdgcn_frexp_mant:
+ case Intrinsic::amdgcn_fdot2:
return true;
default:
break;
Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4395,10 +4395,20 @@
case Intrinsic::amdgcn_cubeid:
return true;
- case Intrinsic::amdgcn_frexp_mant:
+ case Intrinsic::amdgcn_frexp_mant: {
if (SNaN)
return true;
return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+ }
+ case Intrinsic::amdgcn_cvt_pkrtz: {
+ if (SNaN)
+ return true;
+ return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
+ DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
+ }
+ case Intrinsic::amdgcn_fdot2:
+ // TODO: Refine on operand
+ return SNaN;
default:
return false;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D50468.159766.patch
Type: text/x-patch
Size: 3228 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180808/65497b03/attachment.bin>
More information about the llvm-commits
mailing list