[PATCH] D138885: AMDGPU: Fix creating illegal f16 fp_class

Mon Nov 28 17:58:23 PST 2022

arsenm created this revision.
arsenm added reviewers: AMDGPU, JanekvO, Pierre-vh, foad, rampitec.
Herald added subscribers: kosarev, kerbowa, hiraditya, tpr, dstuttard, yaxunl, jvesely, kzhuravl.
Herald added a project: All.
arsenm requested review of this revision.
Herald added a subscriber: wdng.
Herald added a project: LLVM.

We were missing legality checks. The device library build was broken
for targets without f16 support. Technically the first pattern isn't
tested by this patch; it only triggers with the isBeforeLegalize check
in performAndCombine removed. I'm not sure how to trick this into
appearing post-legalization.


https://reviews.llvm.org/D138885

Files:
  llvm/lib/Target/AMDGPU/SIISelLowering.cpp
  llvm/test/CodeGen/AMDGPU/fp-classify.ll


Index: llvm/test/CodeGen/AMDGPU/fp-classify.ll
===================================================================

--- llvm/test/CodeGen/AMDGPU/fp-classify.ll
+++ llvm/test/CodeGen/AMDGPU/fp-classify.ll
@@ -180,5 +180,68 @@
   ret void
 }
 
+; GCN-LABEL: {{^}}test_isinf_pattern_f16:
+; SI-DAG: s_mov_b32 [[INF:s[0-9]+]], 0x7f800000
+; SI-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], |s{{[0-9]+}}|
+; SI: v_cmp_eq_f32_e32 vcc, [[INF]], [[CVT]]
+; SI-NEXT: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
+
+; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x204{{$}}
+; VI: v_cmp_class_f16_e32 vcc, s{{[0-9]+}}, [[MASK]]
+; VI-NOT: v_cmp
+
+; GCN: s_endpgm
+define amdgpu_kernel void @test_isinf_pattern_f16(i32 addrspace(1)* nocapture %out, half %x) #0 {
+  %fabs = tail call half @llvm.fabs.f16(half %x) #1
+  %cmp = fcmp oeq half %fabs, 0xH7C00
+  %ext = zext i1 %cmp to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_isfinite_pattern_0_f16:
+; SI-DAG: s_movk_i32 [[MASK:s[0-9]+]], 0x1f8
+; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
+; SI: v_cmp_class_f32_e64 [[CLASS:s\[[0-9]+:[0-9]+\]]], [[CVT]], [[MASK]]
+; SI-NEXT: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[CLASS]]
+
+; VI-NOT: v_cmp
+; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1f8{{$}}
+; VI: v_cmp_class_f16_e32 vcc, s{{[0-9]+}}, [[MASK]]
+; VI-NOT: v_cmp
+
+; GCN: s_endpgm
+define amdgpu_kernel void @test_isfinite_pattern_0_f16(i32 addrspace(1)* nocapture %out, half %x) #0 {
+  %ord = fcmp ord half %x, 0.0
+  %x.fabs = tail call half @llvm.fabs.f16(half %x) #1
+  %ninf = fcmp une half %x.fabs, 0xH7C00
+  %and = and i1 %ord, %ninf
+  %ext = zext i1 %and to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_isfinite_pattern_4_f16:
+; SI-DAG: s_movk_i32 [[MASK:s[0-9]+]], 0x1f8
+; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
+; SI: v_cmp_class_f32_e64 [[CLASS:s\[[0-9]+:[0-9]+\]]], [[CVT]], [[MASK]]
+; SI-NEXT: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[CLASS]]
+
+; VI-DAG: s_load_dword [[X:s[0-9]+]]
+; VI-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1f8
+; VI: v_cmp_class_f16_e32 vcc, [[X]], [[MASK]]
+; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
+define amdgpu_kernel void @test_isfinite_pattern_4_f16(i32 addrspace(1)* nocapture %out, half %x) #0 {
+  %ord = fcmp ord half %x, 0.0
+  %x.fabs = tail call half @llvm.fabs.f16(half %x) #1
+  %ninf = fcmp one half %x.fabs, 0xH7C00
+  %and = and i1 %ord, %ninf
+  %ext = zext i1 %and to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+declare half @llvm.fabs.f16(half) #1
+
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9676,7 +9676,8 @@
 
     SDValue X = LHS.getOperand(0);
     SDValue Y = RHS.getOperand(0);
-    if (Y.getOpcode() != ISD::FABS || Y.getOperand(0) != X)
+    if (Y.getOpcode() != ISD::FABS || Y.getOperand(0) != X ||
+        !isTypeLegal(X.getValueType()))
       return SDValue();
 
     if (LCC == ISD::SETO) {
@@ -11438,8 +11439,8 @@
     }
   }
 
-  if (VT != MVT::f32 && VT != MVT::f64 && (Subtarget->has16BitInsts() &&
-                                           VT != MVT::f16))
+  if (VT != MVT::f32 && VT != MVT::f64 &&
+      (!Subtarget->has16BitInsts() || VT != MVT::f16))
     return SDValue();
 
   // Match isinf/isfinite pattern


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D138885.478432.patch
Type: text/x-patch
Size: 3526 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20221129/67fcde80/attachment.bin>