[PATCH] D50468: AMDGPU: More canonicalized operations

Wed Aug 8 11:42:22 PDT 2018

arsenm created this revision.
arsenm added a reviewer: rampitec.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, kzhuravl.

https://reviews.llvm.org/D50468

Files:
  lib/Target/AMDGPU/AMDGPUISelLowering.cpp
  lib/Target/AMDGPU/SIISelLowering.cpp
  test/CodeGen/AMDGPU/fcanonicalize-elimination.ll


Index: test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
===================================================================

--- test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@@ -837,6 +837,26 @@
   ret <2 x half> %canonicalized
 }
 
+; GCN-LABEL: {{^}}v_test_canonicalize_cubeid:
+; GCN: s_waitcnt
+; GCN-NEXT: v_cubeid_f32 v0, v0, v1, v2
+; GCN-NEXT: s_setpc_b64
+define float @v_test_canonicalize_cubeid(float %a, float %b, float %c) {
+  %cvt = call float @llvm.amdgcn.cubeid(float %a, float %b, float %c)
+  %canonicalized = call float @llvm.canonicalize.f32(float %cvt)
+  ret float %canonicalized
+}
+
+; GCN-LABEL: {{^}}v_test_canonicalize_frexp_mant:
+; GCN: s_waitcnt
+; GCN-NEXT: v_frexp_mant_f32_e32 v0, v0
+; GCN-NEXT: s_setpc_b64
+define float @v_test_canonicalize_frexp_mant(float %a) {
+  %cvt = call float @llvm.amdgcn.frexp.mant.f32(float %a)
+  %canonicalized = call float @llvm.canonicalize.f32(float %cvt)
+  ret float %canonicalized
+}
+
 ; Avoid failing the test on FreeBSD11.0 which will match the GCN-NOT: 1.0
 ; in the .amd_amdgpu_isa "amdgcn-unknown-freebsd11.0--gfx802" directive
 ; CHECK: .amd_amdgpu_isa
@@ -863,6 +883,8 @@
 declare float @llvm.maxnum.f32(float, float) #0
 declare double @llvm.maxnum.f64(double, double) #0
 declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #0
+declare float @llvm.amdgcn.cubeid(float, float, float) #0
+declare float @llvm.amdgcn.frexp.mant.f32(float) #0
 
 attributes #0 = { nounwind readnone }
 attributes #1 = { "no-nans-fp-math"="true" }
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6832,6 +6832,10 @@
   case AMDGPUISD::FRACT:
   case AMDGPUISD::LDEXP:
   case AMDGPUISD::CVT_PKRTZ_F16_F32:
+  case AMDGPUISD::CVT_F32_UBYTE0:
+  case AMDGPUISD::CVT_F32_UBYTE1:
+  case AMDGPUISD::CVT_F32_UBYTE2:
+  case AMDGPUISD::CVT_F32_UBYTE3:
     return true;
 
   // It can/will be lowered or combined as a bit operation.
@@ -6911,6 +6915,9 @@
     // TODO: Handle more intrinsics
     switch (IntrinsicID) {
     case Intrinsic::amdgcn_cvt_pkrtz:
+    case Intrinsic::amdgcn_cubeid:
+    case Intrinsic::amdgcn_frexp_mant:
+    case Intrinsic::amdgcn_fdot2:
       return true;
     default:
       break;
Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4395,10 +4395,20 @@
     case Intrinsic::amdgcn_cubeid:
       return true;
 
-    case Intrinsic::amdgcn_frexp_mant:
+    case Intrinsic::amdgcn_frexp_mant: {
       if (SNaN)
         return true;
       return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+    }
+    case Intrinsic::amdgcn_cvt_pkrtz: {
+      if (SNaN)
+        return true;
+      return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
+             DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
+    }
+    case Intrinsic::amdgcn_fdot2:
+      // TODO: Refine on operand
+      return SNaN;
     default:
       return false;
     }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D50468.159766.patch
Type: text/x-patch
Size: 3228 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180808/65497b03/attachment.bin>